diff options
author | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-11 10:41:07 +0300 |
---|---|---|
committer | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-13 08:17:18 +0300 |
commit | e09b41010ba33a20a87472ee821fa407a5b8da36 (patch) | |
tree | d10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/infiniband | |
parent | f93b97fd65072de626c074dbe099a1fff05ce060 (diff) |
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page.
During the rebasing, the following patch collided:
Force tick interrupt and get rid of softirq magic(I70131fb85).
Collisions have been removed because its logic was found on the
source already.
Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769
Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/drivers/infiniband')
244 files changed, 12336 insertions, 60465 deletions
diff --git a/kernel/drivers/infiniband/Kconfig b/kernel/drivers/infiniband/Kconfig index b89953149..aa26f3c34 100644 --- a/kernel/drivers/infiniband/Kconfig +++ b/kernel/drivers/infiniband/Kconfig @@ -55,10 +55,7 @@ config INFINIBAND_ADDR_TRANS default y source "drivers/infiniband/hw/mthca/Kconfig" -source "drivers/infiniband/hw/ipath/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" -source "drivers/infiniband/hw/ehca/Kconfig" -source "drivers/infiniband/hw/amso1100/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" diff --git a/kernel/drivers/infiniband/core/Makefile b/kernel/drivers/infiniband/core/Makefile index acf736764..d43a8994a 100644 --- a/kernel/drivers/infiniband/core/Makefile +++ b/kernel/drivers/infiniband/core/Makefile @@ -9,7 +9,8 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ - device.o fmr_pool.o cache.o netlink.o + device.o fmr_pool.o cache.o netlink.o \ + roce_gid_mgmt.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o diff --git a/kernel/drivers/infiniband/core/addr.c b/kernel/drivers/infiniband/core/addr.c index 38339d220..34b1adad0 100644 --- a/kernel/drivers/infiniband/core/addr.c +++ b/kernel/drivers/infiniband/core/addr.c @@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -138,7 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, switch (addr->sa_family) { case AF_INET: - dev = ip_dev_find(&init_net, + dev = ip_dev_find(dev_addr->net, ((struct sockaddr_in *) addr)->sin_addr.s_addr); if (!dev) @@ -149,12 +149,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); break; - #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (ipv6_chk_addr(&init_net, + for_each_netdev_rcu(dev_addr->net, dev) { + if (ipv6_chk_addr(dev_addr->net, &((struct sockaddr_in6 *) addr)->sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -236,7 +235,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, fl4.daddr = dst_ip; fl4.saddr = src_ip; fl4.flowi4_oif = addr->bound_dev_if; - rt = ip_route_output_key(&init_net, &fl4); + rt = ip_route_output_key(addr->net, &fl4); if (IS_ERR(rt)) { ret = PTR_ERR(rt); goto out; @@ -278,12 +277,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - dst = ip6_route_output(&init_net, NULL, &fl6); + dst = ip6_route_output(addr->net, NULL, &fl6); if ((ret = dst->error)) goto put; if (ipv6_addr_any(&fl6.saddr)) { - ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, + ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev, &fl6.daddr, 0, &fl6.saddr); if (ret) goto put; @@ -457,8 +456,8 @@ static void resolve_cb(int status, struct sockaddr *src_addr, complete(&((struct resolve_cb_context *)context)->comp); } -int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, - u16 *vlan_id) +int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, + u8 *dmac, u16 *vlan_id, int if_index) { int ret = 0; struct rdma_dev_addr dev_addr; @@ -476,6 +475,8 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, rdma_gid2ip(&dgid_addr._sockaddr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); + dev_addr.bound_dev_if = if_index; + dev_addr.net = &init_net; ctx.addr = &dev_addr; init_completion(&ctx.comp); @@ -510,6 +511,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) rdma_gid2ip(&gid_addr._sockaddr, sgid); memset(&dev_addr, 0, sizeof(dev_addr)); + dev_addr.net = &init_net; ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); if (ret) return ret; diff --git a/kernel/drivers/infiniband/core/agent.c b/kernel/drivers/infiniband/core/agent.c index f6d29614c..4fa524dfb 100644 --- a/kernel/drivers/infiniband/core/agent.c +++ b/kernel/drivers/infiniband/core/agent.c @@ -54,7 +54,7 @@ static DEFINE_SPINLOCK(ib_agent_port_list_lock); static LIST_HEAD(ib_agent_port_list); static struct ib_agent_port_private * -__ib_get_agent_port(struct ib_device *device, int port_num) +__ib_get_agent_port(const struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; @@ -67,7 +67,7 @@ __ib_get_agent_port(struct ib_device *device, int port_num) } static struct ib_agent_port_private * -ib_get_agent_port(struct ib_device *device, int port_num) +ib_get_agent_port(const struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; unsigned long flags; @@ -78,9 +78,9 @@ ib_get_agent_port(struct ib_device *device, int port_num) return entry; } -void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, - struct ib_wc *wc, struct ib_device *device, - int port_num, int qpn) +void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, + const struct ib_wc *wc, const struct ib_device *device, + int port_num, int qpn, size_t resp_mad_len, bool opa) { struct ib_agent_port_private *port_priv; struct ib_mad_agent *agent; @@ -88,7 +88,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, struct ib_ah *ah; struct ib_mad_send_wr_private *mad_send_wr; - if (device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(device)) port_priv = ib_get_agent_port(device, 0); else port_priv = ib_get_agent_port(device, port_num); @@ -106,22 +106,27 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, return; } + if (opa && mad_hdr->base_version != OPA_MGMT_BASE_VERSION) + resp_mad_len = IB_MGMT_MAD_SIZE; + send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, - IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_KERNEL); + IB_MGMT_MAD_HDR, + resp_mad_len - IB_MGMT_MAD_HDR, + GFP_KERNEL, + mad_hdr->base_version); if (IS_ERR(send_buf)) { dev_err(&device->dev, "ib_create_send_mad error\n"); goto err1; } - memcpy(send_buf->mad, mad, sizeof *mad); + memcpy(send_buf->mad, mad_hdr, resp_mad_len); send_buf->ah = ah; - if (device->node_type == RDMA_NODE_IB_SWITCH) { + if (rdma_cap_ib_switch(device)) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); - mad_send_wr->send_wr.wr.ud.port_num = port_num; + mad_send_wr->send_wr.port_num = port_num; } if (ib_post_send_mad(send_buf, NULL)) { @@ -156,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) goto error1; } - if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_smi(device, port_num)) { /* Obtain send only MAD agent for SMI QP */ port_priv->agent[0] = ib_register_mad_agent(device, port_num, IB_QPT_SMI, NULL, 0, diff --git a/kernel/drivers/infiniband/core/agent.h b/kernel/drivers/infiniband/core/agent.h index 666928700..65f92beda 100644 --- a/kernel/drivers/infiniband/core/agent.h +++ b/kernel/drivers/infiniband/core/agent.h @@ -44,8 +44,8 @@ extern int ib_agent_port_open(struct ib_device *device, int port_num); extern int ib_agent_port_close(struct ib_device *device, int port_num); -extern void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, - struct ib_wc *wc, struct ib_device *device, - int port_num, int qpn); +extern void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, + const struct ib_wc *wc, const struct ib_device *device, + int port_num, int qpn, size_t resp_mad_len, bool opa); #endif /* __AGENT_H_ */ diff --git a/kernel/drivers/infiniband/core/cache.c b/kernel/drivers/infiniband/core/cache.c index 80f6cf244..89bebeada 100644 --- a/kernel/drivers/infiniband/core/cache.c +++ b/kernel/drivers/infiniband/core/cache.c @@ -37,6 +37,8 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/workqueue.h> +#include <linux/netdevice.h> +#include <net/addrconf.h> #include <rdma/ib_cache.h> @@ -47,90 +49,720 @@ struct ib_pkey_cache { u16 table[0]; }; -struct ib_gid_cache { - int table_len; - union ib_gid table[0]; -}; - struct ib_update_work { struct work_struct work; struct ib_device *device; u8 port_num; }; -static inline int start_port(struct ib_device *device) +union ib_gid zgid; +EXPORT_SYMBOL(zgid); + +static const struct ib_gid_attr zattr; + +enum gid_attr_find_mask { + GID_ATTR_FIND_MASK_GID = 1UL << 0, + GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, + GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, +}; + +enum gid_table_entry_props { + GID_TABLE_ENTRY_INVALID = 1UL << 0, + GID_TABLE_ENTRY_DEFAULT = 1UL << 1, +}; + +enum gid_table_write_action { + GID_TABLE_WRITE_ACTION_ADD, + GID_TABLE_WRITE_ACTION_DEL, + /* MODIFY only updates the GID table. Currently only used by + * ib_cache_update. + */ + GID_TABLE_WRITE_ACTION_MODIFY +}; + +struct ib_gid_table_entry { + /* This lock protects an entry from being + * read and written simultaneously. + */ + rwlock_t lock; + unsigned long props; + union ib_gid gid; + struct ib_gid_attr attr; + void *context; +}; + +struct ib_gid_table { + int sz; + /* In RoCE, adding a GID to the table requires: + * (a) Find if this GID is already exists. + * (b) Find a free space. + * (c) Write the new GID + * + * Delete requires different set of operations: + * (a) Find the GID + * (b) Delete it. + * + * Add/delete should be carried out atomically. + * This is done by locking this mutex from multiple + * writers. We don't need this lock for IB, as the MAD + * layer replaces all entries. All data_vec entries + * are locked by this lock. + **/ + struct mutex lock; + struct ib_gid_table_entry *data_vec; +}; + +static int write_gid(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table, int ix, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + enum gid_table_write_action action, + bool default_gid) +{ + int ret = 0; + struct net_device *old_net_dev; + unsigned long flags; + + /* in rdma_cap_roce_gid_table, this funciton should be protected by a + * sleep-able lock. + */ + write_lock_irqsave(&table->data_vec[ix].lock, flags); + + if (rdma_cap_roce_gid_table(ib_dev, port)) { + table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; + write_unlock_irqrestore(&table->data_vec[ix].lock, flags); + /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by + * RoCE providers and thus only updates the cache. + */ + if (action == GID_TABLE_WRITE_ACTION_ADD) + ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr, + &table->data_vec[ix].context); + else if (action == GID_TABLE_WRITE_ACTION_DEL) + ret = ib_dev->del_gid(ib_dev, port, ix, + &table->data_vec[ix].context); + write_lock_irqsave(&table->data_vec[ix].lock, flags); + } + + old_net_dev = table->data_vec[ix].attr.ndev; + if (old_net_dev && old_net_dev != attr->ndev) + dev_put(old_net_dev); + /* if modify_gid failed, just delete the old gid */ + if (ret || action == GID_TABLE_WRITE_ACTION_DEL) { + gid = &zgid; + attr = &zattr; + table->data_vec[ix].context = NULL; + } + if (default_gid) + table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; + memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); + memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); + if (table->data_vec[ix].attr.ndev && + table->data_vec[ix].attr.ndev != old_net_dev) + dev_hold(table->data_vec[ix].attr.ndev); + + table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; + + write_unlock_irqrestore(&table->data_vec[ix].lock, flags); + + if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) { + struct ib_event event; + + event.device = ib_dev; + event.element.port_num = port; + event.event = IB_EVENT_GID_CHANGE; + + ib_dispatch_event(&event); + } + return ret; +} + +static int add_gid(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table, int ix, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + bool default_gid) { + return write_gid(ib_dev, port, table, ix, gid, attr, + GID_TABLE_WRITE_ACTION_ADD, default_gid); +} + +static int modify_gid(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table, int ix, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + bool default_gid) { + return write_gid(ib_dev, port, table, ix, gid, attr, + GID_TABLE_WRITE_ACTION_MODIFY, default_gid); +} + +static int del_gid(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table, int ix, + bool default_gid) { + return write_gid(ib_dev, port, table, ix, &zgid, &zattr, + GID_TABLE_WRITE_ACTION_DEL, default_gid); +} + +static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, + const struct ib_gid_attr *val, bool default_gid, + unsigned long mask) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; + int i; + + for (i = 0; i < table->sz; i++) { + unsigned long flags; + struct ib_gid_attr *attr = &table->data_vec[i].attr; + + read_lock_irqsave(&table->data_vec[i].lock, flags); + + if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) + goto next; + + if (mask & GID_ATTR_FIND_MASK_GID && + memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) + goto next; + + if (mask & GID_ATTR_FIND_MASK_NETDEV && + attr->ndev != val->ndev) + goto next; + + if (mask & GID_ATTR_FIND_MASK_DEFAULT && + !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) != + default_gid) + goto next; + + read_unlock_irqrestore(&table->data_vec[i].lock, flags); + return i; +next: + read_unlock_irqrestore(&table->data_vec[i].lock, flags); + } + + return -1; } -static inline int end_port(struct ib_device *device) +static void make_default_gid(struct net_device *dev, union ib_gid *gid) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + addrconf_ifid_eui48(&gid->raw[8], dev); } -int ib_get_cached_gid(struct ib_device *device, - u8 port_num, - int index, - union ib_gid *gid) +int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr) { - struct ib_gid_cache *cache; - unsigned long flags; + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; + int ix; int ret = 0; + struct net_device *idev; - if (port_num < start_port(device) || port_num > end_port(device)) + table = ports_table[port - rdma_start_port(ib_dev)]; + + if (!memcmp(gid, &zgid, sizeof(*gid))) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + if (ib_dev->get_netdev) { + idev = ib_dev->get_netdev(ib_dev, port); + if (idev && attr->ndev != idev) { + union ib_gid default_gid; - cache = device->cache.gid_cache[port_num - start_port(device)]; + /* Adding default GIDs in not permitted */ + make_default_gid(idev, &default_gid); + if (!memcmp(gid, &default_gid, sizeof(*gid))) { + dev_put(idev); + return -EPERM; + } + } + if (idev) + dev_put(idev); + } - if (index < 0 || index >= cache->table_len) - ret = -EINVAL; - else - *gid = cache->table[index]; + mutex_lock(&table->lock); - read_unlock_irqrestore(&device->cache.lock, flags); + ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_NETDEV); + if (ix >= 0) + goto out_unlock; + + ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_DEFAULT); + if (ix < 0) { + ret = -ENOSPC; + goto out_unlock; + } + add_gid(ib_dev, port, table, ix, gid, attr, false); + +out_unlock: + mutex_unlock(&table->lock); return ret; } -EXPORT_SYMBOL(ib_get_cached_gid); -int ib_find_cached_gid(struct ib_device *device, - union ib_gid *gid, - u8 *port_num, - u16 *index) +int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr) +{ + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; + int ix; + + table = ports_table[port - rdma_start_port(ib_dev)]; + + mutex_lock(&table->lock); + + ix = find_gid(table, gid, attr, false, + GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_NETDEV | + GID_ATTR_FIND_MASK_DEFAULT); + if (ix < 0) + goto out_unlock; + + del_gid(ib_dev, port, table, ix, false); + +out_unlock: + mutex_unlock(&table->lock); + return 0; +} + +int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, + struct net_device *ndev) { - struct ib_gid_cache *cache; + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; + int ix; + + table = ports_table[port - rdma_start_port(ib_dev)]; + + mutex_lock(&table->lock); + + for (ix = 0; ix < table->sz; ix++) + if (table->data_vec[ix].attr.ndev == ndev) + del_gid(ib_dev, port, table, ix, false); + + mutex_unlock(&table->lock); + return 0; +} + +static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, + union ib_gid *gid, struct ib_gid_attr *attr) +{ + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; unsigned long flags; - int p, i; - int ret = -ENOENT; - *port_num = -1; + table = ports_table[port - rdma_start_port(ib_dev)]; + + if (index < 0 || index >= table->sz) + return -EINVAL; + + read_lock_irqsave(&table->data_vec[index].lock, flags); + if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) { + read_unlock_irqrestore(&table->data_vec[index].lock, flags); + return -EAGAIN; + } + + memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); + if (attr) { + memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); + if (attr->ndev) + dev_hold(attr->ndev); + } + + read_unlock_irqrestore(&table->data_vec[index].lock, flags); + return 0; +} + +static int _ib_cache_gid_table_find(struct ib_device *ib_dev, + const union ib_gid *gid, + const struct ib_gid_attr *val, + unsigned long mask, + u8 *port, u16 *index) +{ + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; + u8 p; + int local_index; + + for (p = 0; p < ib_dev->phys_port_cnt; p++) { + table = ports_table[p]; + local_index = find_gid(table, gid, val, false, mask); + if (local_index >= 0) { + if (index) + *index = local_index; + if (port) + *port = p + rdma_start_port(ib_dev); + return 0; + } + } + + return -ENOENT; +} + +static int ib_cache_gid_find(struct ib_device *ib_dev, + const union ib_gid *gid, + struct net_device *ndev, u8 *port, + u16 *index) +{ + unsigned long mask = GID_ATTR_FIND_MASK_GID; + struct ib_gid_attr gid_attr_val = {.ndev = ndev}; + + if (ndev) + mask |= GID_ATTR_FIND_MASK_NETDEV; + + return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, + mask, port, index); +} + +int ib_find_cached_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + u8 port, struct net_device *ndev, + u16 *index) +{ + int local_index; + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; + unsigned long mask = GID_ATTR_FIND_MASK_GID; + struct ib_gid_attr val = {.ndev = ndev}; + + if (port < rdma_start_port(ib_dev) || + port > rdma_end_port(ib_dev)) + return -ENOENT; + + table = ports_table[port - rdma_start_port(ib_dev)]; + + if (ndev) + mask |= GID_ATTR_FIND_MASK_NETDEV; + + local_index = find_gid(table, gid, &val, false, mask); + if (local_index >= 0) { + if (index) + *index = local_index; + return 0; + } + + return -ENOENT; +} +EXPORT_SYMBOL(ib_find_cached_gid_by_port); + +/** + * ib_find_gid_by_filter - Returns the GID table index where a specified + * GID value occurs + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value could be + * searched. + * @filter: The filter function is executed on any matching GID in the table. + * If the filter function returns true, the corresponding index is returned, + * otherwise, we continue searching the GID table. It's guaranteed that + * while filter is executed, ndev field is valid and the structure won't + * change. filter is executed in an atomic context. filter must not be NULL. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_cache_gid_find_by_filter() searches for the specified GID value + * of which the filter function returns true in the port's GID table. + * This function is only supported on RoCE ports. + * + */ +static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, + const union ib_gid *gid, + u8 port, + bool (*filter)(const union ib_gid *, + const struct ib_gid_attr *, + void *), + void *context, + u16 *index) +{ + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; + unsigned int i; + bool found = false; + + if (!ports_table) + return -EOPNOTSUPP; + + if (port < rdma_start_port(ib_dev) || + port > rdma_end_port(ib_dev) || + !rdma_protocol_roce(ib_dev, port)) + return -EPROTONOSUPPORT; + + table = ports_table[port - rdma_start_port(ib_dev)]; + + for (i = 0; i < table->sz; i++) { + struct ib_gid_attr attr; + unsigned long flags; + + read_lock_irqsave(&table->data_vec[i].lock, flags); + if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) + goto next; + + if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) + goto next; + + memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); + + if (filter(gid, &attr, context)) + found = true; + +next: + read_unlock_irqrestore(&table->data_vec[i].lock, flags); + + if (found) + break; + } + + if (!found) + return -ENOENT; + if (index) - *index = -1; + *index = i; + return 0; +} - read_lock_irqsave(&device->cache.lock, flags); +static struct ib_gid_table *alloc_gid_table(int sz) +{ + unsigned int i; + struct ib_gid_table *table = + kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); + if (!table) + return NULL; - for (p = 0; p <= end_port(device) - start_port(device); ++p) { - cache = device->cache.gid_cache[p]; - for (i = 0; i < cache->table_len; ++i) { - if (!memcmp(gid, &cache->table[i], sizeof *gid)) { - *port_num = p + start_port(device); - if (index) - *index = i; - ret = 0; - goto found; - } + table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); + if (!table->data_vec) + goto err_free_table; + + mutex_init(&table->lock); + + table->sz = sz; + + for (i = 0; i < sz; i++) + rwlock_init(&table->data_vec[i].lock); + + return table; + +err_free_table: + kfree(table); + return NULL; +} + +static void release_gid_table(struct ib_gid_table *table) +{ + if (table) { + kfree(table->data_vec); + kfree(table); + } +} + +static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table) +{ + int i; + + if (!table) + return; + + for (i = 0; i < table->sz; ++i) { + if (memcmp(&table->data_vec[i].gid, &zgid, + sizeof(table->data_vec[i].gid))) + del_gid(ib_dev, port, table, i, + table->data_vec[i].props & + GID_ATTR_FIND_MASK_DEFAULT); + } +} + +void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, + struct net_device *ndev, + enum ib_cache_gid_default_mode mode) +{ + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + union ib_gid gid; + struct ib_gid_attr gid_attr; + struct ib_gid_table *table; + int ix; + union ib_gid current_gid; + struct ib_gid_attr current_gid_attr = {}; + + table = ports_table[port - rdma_start_port(ib_dev)]; + + make_default_gid(ndev, &gid); + memset(&gid_attr, 0, sizeof(gid_attr)); + gid_attr.ndev = ndev; + + mutex_lock(&table->lock); + ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT); + + /* Coudn't find default GID location */ + WARN_ON(ix < 0); + + if (!__ib_cache_gid_get(ib_dev, port, ix, + ¤t_gid, ¤t_gid_attr) && + mode == IB_CACHE_GID_DEFAULT_MODE_SET && + !memcmp(&gid, ¤t_gid, sizeof(gid)) && + !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr))) + goto unlock; + + if ((memcmp(¤t_gid, &zgid, sizeof(current_gid)) || + memcmp(¤t_gid_attr, &zattr, + sizeof(current_gid_attr))) && + del_gid(ib_dev, port, table, ix, true)) { + pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", + ix, gid.raw); + goto unlock; + } + + if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) + if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) + pr_warn("ib_cache_gid: unable to add default gid %pI6\n", + gid.raw); + +unlock: + if (current_gid_attr.ndev) + dev_put(current_gid_attr.ndev); + mutex_unlock(&table->lock); +} + +static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table) +{ + if (rdma_protocol_roce(ib_dev, port)) { + struct ib_gid_table_entry *entry = &table->data_vec[0]; + + entry->props |= GID_TABLE_ENTRY_DEFAULT; + } + + return 0; +} + +static int _gid_table_setup_one(struct ib_device *ib_dev) +{ + u8 port; + struct ib_gid_table **table; + int err = 0; + + table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); + + if (!table) { + pr_warn("failed to allocate ib gid cache for %s\n", + ib_dev->name); + return -ENOMEM; + } + + for (port = 0; port < ib_dev->phys_port_cnt; port++) { + u8 rdma_port = port + rdma_start_port(ib_dev); + + table[port] = + alloc_gid_table( + ib_dev->port_immutable[rdma_port].gid_tbl_len); + if (!table[port]) { + err = -ENOMEM; + goto rollback_table_setup; } + + err = gid_table_reserve_default(ib_dev, + port + rdma_start_port(ib_dev), + table[port]); + if (err) + goto rollback_table_setup; } -found: - read_unlock_irqrestore(&device->cache.lock, flags); - return ret; + ib_dev->cache.gid_cache = table; + return 0; + +rollback_table_setup: + for (port = 0; port < ib_dev->phys_port_cnt; port++) { + cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), + table[port]); + release_gid_table(table[port]); + } + + kfree(table); + return err; +} + +static void gid_table_release_one(struct ib_device *ib_dev) +{ + struct ib_gid_table **table = ib_dev->cache.gid_cache; + u8 port; + + if (!table) + return; + + for (port = 0; port < ib_dev->phys_port_cnt; port++) + release_gid_table(table[port]); + + kfree(table); + ib_dev->cache.gid_cache = NULL; +} + +static void gid_table_cleanup_one(struct ib_device *ib_dev) +{ + struct ib_gid_table **table = ib_dev->cache.gid_cache; + u8 port; + + if (!table) + return; + + for (port = 0; port < ib_dev->phys_port_cnt; port++) + cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), + table[port]); +} + +static int gid_table_setup_one(struct ib_device *ib_dev) +{ + int err; + + err = _gid_table_setup_one(ib_dev); + + if (err) + return err; + + err = roce_rescan_device(ib_dev); + + if (err) { + gid_table_cleanup_one(ib_dev); + gid_table_release_one(ib_dev); + } + + return err; +} + +int ib_get_cached_gid(struct ib_device *device, + u8 port_num, + int index, + union ib_gid *gid, + struct ib_gid_attr *gid_attr) +{ + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + return -EINVAL; + + return __ib_cache_gid_get(device, port_num, index, gid, gid_attr); +} +EXPORT_SYMBOL(ib_get_cached_gid); + +int ib_find_cached_gid(struct ib_device *device, + const union ib_gid *gid, + struct net_device *ndev, + u8 *port_num, + u16 *index) +{ + return ib_cache_gid_find(device, gid, ndev, port_num, index); } EXPORT_SYMBOL(ib_find_cached_gid); +int ib_find_gid_by_filter(struct ib_device *device, + const union ib_gid *gid, + u8 port_num, + bool (*filter)(const union ib_gid *gid, + const struct ib_gid_attr *, + void *), + void *context, u16 *index) +{ + /* Only RoCE GID table supports filter function */ + if (!rdma_cap_roce_gid_table(device, port_num) && filter) + return -EPROTONOSUPPORT; + + return ib_cache_gid_find_by_filter(device, gid, + port_num, filter, + context, index); +} +EXPORT_SYMBOL(ib_find_gid_by_filter); + int ib_get_cached_pkey(struct ib_device *device, u8 port_num, int index, @@ -140,12 +772,12 @@ int ib_get_cached_pkey(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; if (index < 0 || index >= cache->table_len) ret = -EINVAL; @@ -169,12 +801,12 @@ int ib_find_cached_pkey(struct ib_device *device, int ret = -ENOENT; int partial_ix = -1; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; *index = -1; @@ -209,12 +841,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device, int i; int ret = -ENOENT; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; *index = -1; @@ -238,11 +870,11 @@ int ib_get_cached_lmc(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - *lmc = device->cache.lmc_cache[port_num - start_port(device)]; + *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)]; read_unlock_irqrestore(&device->cache.lock, flags); return ret; @@ -254,9 +886,21 @@ static void ib_cache_update(struct ib_device *device, { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; - struct ib_gid_cache *gid_cache = NULL, *old_gid_cache; + struct ib_gid_cache { + int table_len; + union ib_gid table[0]; + } *gid_cache = NULL; int i; int ret; + struct ib_gid_table *table; + struct ib_gid_table **ports_table = device->cache.gid_cache; + bool use_roce_gid_table = + rdma_cap_roce_gid_table(device, port); + + if (port < rdma_start_port(device) || port > rdma_end_port(device)) + return; + + table = ports_table[port - rdma_start_port(device)]; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) @@ -276,12 +920,14 @@ static void ib_cache_update(struct ib_device *device, pkey_cache->table_len = tprops->pkey_tbl_len; - gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len * - sizeof *gid_cache->table, GFP_KERNEL); - if (!gid_cache) - goto err; + if (!use_roce_gid_table) { + gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len * + sizeof(*gid_cache->table), GFP_KERNEL); + if (!gid_cache) + goto err; - gid_cache->table_len = tprops->gid_tbl_len; + gid_cache->table_len = tprops->gid_tbl_len; + } for (i = 0; i < pkey_cache->table_len; ++i) { ret = ib_query_pkey(device, port, i, pkey_cache->table + i); @@ -292,29 +938,36 @@ static void ib_cache_update(struct ib_device *device, } } - for (i = 0; i < gid_cache->table_len; ++i) { - ret = ib_query_gid(device, port, i, gid_cache->table + i); - if (ret) { - printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", - ret, device->name, i); - goto err; + if (!use_roce_gid_table) { + for (i = 0; i < gid_cache->table_len; ++i) { + ret = ib_query_gid(device, port, i, + gid_cache->table + i, NULL); + if (ret) { + printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", + ret, device->name, i); + goto err; + } } } write_lock_irq(&device->cache.lock); - old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; - old_gid_cache = device->cache.gid_cache [port - start_port(device)]; + old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)]; - device->cache.pkey_cache[port - start_port(device)] = pkey_cache; - device->cache.gid_cache [port - start_port(device)] = gid_cache; + device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; + if (!use_roce_gid_table) { + for (i = 0; i < gid_cache->table_len; i++) { + modify_gid(device, port, table, i, gid_cache->table + i, + &zattr, false); + } + } - device->cache.lmc_cache[port - start_port(device)] = tprops->lmc; + device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; write_unlock_irq(&device->cache.lock); + kfree(gid_cache); kfree(old_pkey_cache); - kfree(old_gid_cache); kfree(tprops); return; @@ -355,85 +1008,88 @@ static void ib_cache_event(struct ib_event_handler *handler, } } -static void ib_cache_setup_one(struct ib_device *device) +int ib_cache_setup_one(struct ib_device *device) { int p; + int err; rwlock_init(&device->cache.lock); device->cache.pkey_cache = - kmalloc(sizeof *device->cache.pkey_cache * - (end_port(device) - start_port(device) + 1), GFP_KERNEL); - device->cache.gid_cache = - kmalloc(sizeof *device->cache.gid_cache * - (end_port(device) - start_port(device) + 1), GFP_KERNEL); - + kzalloc(sizeof *device->cache.pkey_cache * + (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * - (end_port(device) - - start_port(device) + 1), + (rdma_end_port(device) - + rdma_start_port(device) + 1), GFP_KERNEL); - - if (!device->cache.pkey_cache || !device->cache.gid_cache || + if (!device->cache.pkey_cache || !device->cache.lmc_cache) { printk(KERN_WARNING "Couldn't allocate cache " "for %s\n", device->name); - goto err; + return -ENOMEM; } - for (p = 0; p <= end_port(device) - start_port(device); ++p) { - device->cache.pkey_cache[p] = NULL; - device->cache.gid_cache [p] = NULL; - ib_cache_update(device, p + start_port(device)); - } + err = gid_table_setup_one(device); + if (err) + /* Allocated memory will be cleaned in the release function */ + return err; + + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) + ib_cache_update(device, p + rdma_start_port(device)); INIT_IB_EVENT_HANDLER(&device->cache.event_handler, device, ib_cache_event); - if (ib_register_event_handler(&device->cache.event_handler)) - goto err_cache; - - return; + err = ib_register_event_handler(&device->cache.event_handler); + if (err) + goto err; -err_cache: - for (p = 0; p <= end_port(device) - start_port(device); ++p) { - kfree(device->cache.pkey_cache[p]); - kfree(device->cache.gid_cache[p]); - } + return 0; err: - kfree(device->cache.pkey_cache); - kfree(device->cache.gid_cache); - kfree(device->cache.lmc_cache); + gid_table_cleanup_one(device); + return err; } -static void ib_cache_cleanup_one(struct ib_device *device) +void ib_cache_release_one(struct ib_device *device) { int p; - ib_unregister_event_handler(&device->cache.event_handler); - flush_workqueue(ib_wq); - - for (p = 0; p <= end_port(device) - start_port(device); ++p) { - kfree(device->cache.pkey_cache[p]); - kfree(device->cache.gid_cache[p]); - } - + /* + * The release function frees all the cache elements. + * This function should be called as part of freeing + * all the device's resources when the cache could no + * longer be accessed. + */ + if (device->cache.pkey_cache) + for (p = 0; + p <= rdma_end_port(device) - rdma_start_port(device); ++p) + kfree(device->cache.pkey_cache[p]); + + gid_table_release_one(device); kfree(device->cache.pkey_cache); - kfree(device->cache.gid_cache); kfree(device->cache.lmc_cache); } -static struct ib_client cache_client = { - .name = "cache", - .add = ib_cache_setup_one, - .remove = ib_cache_cleanup_one -}; +void ib_cache_cleanup_one(struct ib_device *device) +{ + /* The cleanup function unregisters the event handler, + * waits for all in-progress workqueue elements and cleans + * up the GID cache. This function should be called after + * the device was removed from the devices list and all + * clients were removed, so the cache exists but is + * non-functional and shouldn't be updated anymore. + */ + ib_unregister_event_handler(&device->cache.event_handler); + flush_workqueue(ib_wq); + gid_table_cleanup_one(device); +} -int __init ib_cache_setup(void) +void __init ib_cache_setup(void) { - return ib_register_client(&cache_client); + roce_gid_mgmt_init(); } void __exit ib_cache_cleanup(void) { - ib_unregister_client(&cache_client); + roce_gid_mgmt_cleanup(); } diff --git a/kernel/drivers/infiniband/core/cm.c b/kernel/drivers/infiniband/core/cm.c index 0271608a5..d6d2b3582 100644 --- a/kernel/drivers/infiniband/core/cm.c +++ b/kernel/drivers/infiniband/core/cm.c @@ -58,7 +58,7 @@ MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); static void cm_add_one(struct ib_device *device); -static void cm_remove_one(struct ib_device *device); +static void cm_remove_one(struct ib_device *device, void *client_data); static struct ib_client cm_client = { .name = "cm", @@ -169,6 +169,7 @@ struct cm_device { struct ib_device *ib_device; struct device *device; u8 ack_delay; + int going_down; struct cm_port *port[0]; }; @@ -178,8 +179,6 @@ struct cm_av { struct ib_ah_attr ah_attr; u16 pkey_index; u8 timeout; - u8 valid; - u8 smac[ETH_ALEN]; }; struct cm_work { @@ -212,13 +211,15 @@ struct cm_id_private { spinlock_t lock; /* Do not acquire inside cm.lock */ struct completion comp; atomic_t refcount; + /* Number of clients sharing this ib_cm_id. Only valid for listeners. + * Protected by the cm.lock spinlock. */ + int listen_sharecount; struct ib_mad_send_buf *msg; struct cm_timewait_info *timewait_info; /* todo: use alternate port on send failure */ struct cm_av av; struct cm_av alt_av; - struct ib_cm_compare_data *compare_data; void *private_data; __be64 tid; @@ -267,7 +268,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC); + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); @@ -297,7 +299,8 @@ static int cm_alloc_response_msg(struct cm_port *port, m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC); + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); @@ -356,17 +359,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) unsigned long flags; int ret; u8 p; + struct net_device *ndev = ib_get_ndev_from_path(path); read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - &p, NULL)) { + ndev, &p, NULL)) { port = cm_dev->port[p-1]; break; } } read_unlock_irqrestore(&cm.device_lock, flags); + if (ndev) + dev_put(ndev); + if (!port) return -EINVAL; @@ -379,9 +386,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); av->timeout = path->packet_life_time + 1; - memcpy(av->smac, path->smac, sizeof(av->smac)); - av->valid = 1; return 0; } @@ -437,40 +442,6 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) return cm_id_priv; } -static void cm_mask_copy(u32 *dst, const u32 *src, const u32 *mask) -{ - int i; - - for (i = 0; i < IB_CM_COMPARE_SIZE; i++) - dst[i] = src[i] & mask[i]; -} - -static int cm_compare_data(struct ib_cm_compare_data *src_data, - struct ib_cm_compare_data *dst_data) -{ - u32 src[IB_CM_COMPARE_SIZE]; - u32 dst[IB_CM_COMPARE_SIZE]; - - if (!src_data || !dst_data) - return 0; - - cm_mask_copy(src, src_data->data, dst_data->mask); - cm_mask_copy(dst, dst_data->data, src_data->mask); - return memcmp(src, dst, sizeof(src)); -} - -static int cm_compare_private_data(u32 *private_data, - struct ib_cm_compare_data *dst_data) -{ - u32 src[IB_CM_COMPARE_SIZE]; - - if (!dst_data) - return 0; - - cm_mask_copy(src, private_data, dst_data->mask); - return memcmp(src, dst_data->data, sizeof(src)); -} - /* * Trivial helpers to strip endian annotation and compare; the * endianness doesn't actually matter since we just need a stable @@ -503,18 +474,14 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; __be64 service_mask = cm_id_priv->id.service_mask; - int data_cmp; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); - data_cmp = cm_compare_data(cm_id_priv->compare_data, - cur_cm_id_priv->compare_data); if ((cur_cm_id_priv->id.service_mask & service_id) == (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device) && - !data_cmp) + (cm_id_priv->id.device == cur_cm_id_priv->id.device)) return cur_cm_id_priv; if (cm_id_priv->id.device < cur_cm_id_priv->id.device) @@ -525,8 +492,6 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) link = &(*link)->rb_left; else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_right; - else if (data_cmp < 0) - link = &(*link)->rb_left; else link = &(*link)->rb_right; } @@ -536,20 +501,16 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) } static struct cm_id_private * cm_find_listen(struct ib_device *device, - __be64 service_id, - u32 *private_data) + __be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; - int data_cmp; while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); - data_cmp = cm_compare_private_data(private_data, - cm_id_priv->compare_data); if ((cm_id_priv->id.service_mask & service_id) == cm_id_priv->id.service_id && - (cm_id_priv->id.device == device) && !data_cmp) + (cm_id_priv->id.device == device)) return cm_id_priv; if (device < cm_id_priv->id.device) @@ -560,8 +521,6 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device, node = node->rb_left; else if (be64_gt(service_id, cm_id_priv->id.service_id)) node = node->rb_right; - else if (data_cmp < 0) - node = node->rb_left; else node = node->rb_right; } @@ -803,6 +762,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; unsigned long flags; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); + if (!cm_dev) + return; spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); @@ -816,8 +780,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) */ cm_id_priv->id.state = IB_CM_TIMEWAIT; wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); - queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, - msecs_to_jiffies(wait_time)); + + /* Check if the device started its remove_one */ + spin_lock_irqsave(&cm.lock, flags); + if (!cm_dev->going_down) + queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, + msecs_to_jiffies(wait_time)); + spin_unlock_irqrestore(&cm.lock, flags); + cm_id_priv->timewait_info = NULL; } @@ -845,9 +815,15 @@ retest: spin_lock_irq(&cm_id_priv->lock); switch (cm_id->state) { case IB_CM_LISTEN: - cm_id->state = IB_CM_IDLE; spin_unlock_irq(&cm_id_priv->lock); + spin_lock_irq(&cm.lock); + if (--cm_id_priv->listen_sharecount > 0) { + /* The id is still shared. */ + cm_deref_id(cm_id_priv); + spin_unlock_irq(&cm.lock); + return; + } rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); spin_unlock_irq(&cm.lock); break; @@ -859,6 +835,11 @@ retest: case IB_CM_SIDR_REQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); + spin_lock_irq(&cm.lock); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) + rb_erase(&cm_id_priv->sidr_id_node, + &cm.remote_sidr_table); + spin_unlock_irq(&cm.lock); break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: @@ -916,7 +897,6 @@ retest: wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); - kfree(cm_id_priv->compare_data); kfree(cm_id_priv->private_data); kfree(cm_id_priv); } @@ -927,11 +907,23 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) } EXPORT_SYMBOL(ib_destroy_cm_id); -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, - struct ib_cm_compare_data *compare_data) +/** + * __ib_cm_listen - Initiates listening on the specified service ID for + * connection and service ID resolution requests. + * @cm_id: Connection identifier associated with the listen request. + * @service_id: Service identifier matched against incoming connection + * and service ID resolution requests. The service ID should be specified + * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will + * assign a service ID to the caller. + * @service_mask: Mask applied to service ID used to listen across a + * range of service IDs. If set to 0, the service ID is matched + * exactly. This parameter is ignored if %service_id is set to + * IB_CM_ASSIGN_SERVICE_ID. + */ +static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, + __be64 service_mask) { struct cm_id_private *cm_id_priv, *cur_cm_id_priv; - unsigned long flags; int ret = 0; service_mask = service_mask ? service_mask : ~cpu_to_be64(0); @@ -944,20 +936,9 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, if (cm_id->state != IB_CM_IDLE) return -EINVAL; - if (compare_data) { - cm_id_priv->compare_data = kzalloc(sizeof *compare_data, - GFP_KERNEL); - if (!cm_id_priv->compare_data) - return -ENOMEM; - cm_mask_copy(cm_id_priv->compare_data->data, - compare_data->data, compare_data->mask); - memcpy(cm_id_priv->compare_data->mask, compare_data->mask, - sizeof(compare_data->mask)); - } - cm_id->state = IB_CM_LISTEN; + ++cm_id_priv->listen_sharecount; - spin_lock_irqsave(&cm.lock, flags); if (service_id == IB_CM_ASSIGN_SERVICE_ID) { cm_id->service_id = cpu_to_be64(cm.listen_service_id++); cm_id->service_mask = ~cpu_to_be64(0); @@ -966,18 +947,95 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, cm_id->service_mask = service_mask; } cur_cm_id_priv = cm_insert_listen(cm_id_priv); - spin_unlock_irqrestore(&cm.lock, flags); if (cur_cm_id_priv) { cm_id->state = IB_CM_IDLE; - kfree(cm_id_priv->compare_data); - cm_id_priv->compare_data = NULL; + --cm_id_priv->listen_sharecount; ret = -EBUSY; } return ret; } + +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm.lock, flags); + ret = __ib_cm_listen(cm_id, service_id, service_mask); + spin_unlock_irqrestore(&cm.lock, flags); + + return ret; +} EXPORT_SYMBOL(ib_cm_listen); +/** + * Create a new listening ib_cm_id and listen on the given service ID. + * + * If there's an existing ID listening on that same device and service ID, + * return it. + * + * @device: Device associated with the cm_id. All related communication will + * be associated with the specified device. + * @cm_handler: Callback invoked to notify the user of CM events. + * @service_id: Service identifier matched against incoming connection + * and service ID resolution requests. The service ID should be specified + * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will + * assign a service ID to the caller. + * + * Callers should call ib_destroy_cm_id when done with the listener ID. + */ +struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, + ib_cm_handler cm_handler, + __be64 service_id) +{ + struct cm_id_private *cm_id_priv; + struct ib_cm_id *cm_id; + unsigned long flags; + int err = 0; + + /* Create an ID in advance, since the creation may sleep */ + cm_id = ib_create_cm_id(device, cm_handler, NULL); + if (IS_ERR(cm_id)) + return cm_id; + + spin_lock_irqsave(&cm.lock, flags); + + if (service_id == IB_CM_ASSIGN_SERVICE_ID) + goto new_id; + + /* Find an existing ID */ + cm_id_priv = cm_find_listen(device, service_id); + if (cm_id_priv) { + if (cm_id->cm_handler != cm_handler || cm_id->context) { + /* Sharing an ib_cm_id with different handlers is not + * supported */ + spin_unlock_irqrestore(&cm.lock, flags); + return ERR_PTR(-EINVAL); + } + atomic_inc(&cm_id_priv->refcount); + ++cm_id_priv->listen_sharecount; + spin_unlock_irqrestore(&cm.lock, flags); + + ib_destroy_cm_id(cm_id); + cm_id = &cm_id_priv->id; + return cm_id; + } + +new_id: + /* Use newly created ID */ + err = __ib_cm_listen(cm_id, service_id, 0); + + spin_unlock_irqrestore(&cm.lock, flags); + + if (err) { + ib_destroy_cm_id(cm_id); + return ERR_PTR(err); + } + return cm_id; +} +EXPORT_SYMBOL(ib_cm_insert_listen); + static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, enum cm_msg_sequence msg_seq) { @@ -1254,6 +1312,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, primary_path->packet_life_time = cm_req_get_primary_local_ack_timeout(req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); + primary_path->service_id = req_msg->service_id; if (req_msg->alt_local_lid) { memset(alt_path, 0, sizeof *alt_path); @@ -1275,7 +1334,26 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, alt_path->packet_life_time = cm_req_get_alt_local_ack_timeout(req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); + alt_path->service_id = req_msg->service_id; + } +} + +static u16 cm_get_bth_pkey(struct cm_work *work) +{ + struct ib_device *ib_dev = work->port->cm_dev->ib_device; + u8 port_num = work->port->port_num; + u16 pkey_index = work->mad_recv_wc->wc->pkey_index; + u16 pkey; + int ret; + + ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey); + if (ret) { + dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n", + port_num, pkey_index, ret); + return 0; } + + return pkey; } static void cm_format_req_event(struct cm_work *work, @@ -1288,6 +1366,7 @@ static void cm_format_req_event(struct cm_work *work, req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; + param->bth_pkey = cm_get_bth_pkey(work); param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) @@ -1470,8 +1549,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, /* Find matching listen request. */ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, - req_msg->service_id, - req_msg->private_data); + req_msg->service_id); if (!listen_cm_id_priv) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); @@ -1561,11 +1639,11 @@ static int cm_req_handler(struct cm_work *work) cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); - work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, 0, &work->path[0].sgid); + work->port->port_num, 0, &work->path[0].sgid, + NULL); ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); @@ -2978,6 +3056,8 @@ static void cm_format_sidr_req_event(struct cm_work *work, param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; + param->service_id = sidr_req_msg->service_id; + param->bth_pkey = cm_get_bth_pkey(work); param->port = work->port->port_num; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -3017,8 +3097,7 @@ static int cm_sidr_req_handler(struct cm_work *work) } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; cur_cm_id_priv = cm_find_listen(cm_id->device, - sidr_req_msg->service_id, - sidr_req_msg->private_data); + sidr_req_msg->service_id); if (!cur_cm_id_priv) { spin_unlock_irq(&cm.lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); @@ -3098,7 +3177,10 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, spin_unlock_irqrestore(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm.lock, flags); - rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); + } spin_unlock_irqrestore(&cm.lock, flags); return 0; @@ -3303,6 +3385,11 @@ static int cm_establish(struct ib_cm_id *cm_id) struct cm_work *work; unsigned long flags; int ret = 0; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id->device, &cm_client); + if (!cm_dev) + return -ENODEV; work = kmalloc(sizeof *work, GFP_ATOMIC); if (!work) @@ -3341,7 +3428,17 @@ static int cm_establish(struct ib_cm_id *cm_id) work->remote_id = cm_id->remote_id; work->mad_recv_wc = NULL; work->cm_event.event = IB_CM_USER_ESTABLISHED; - queue_delayed_work(cm.wq, &work->work, 0); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!cm_dev->going_down) { + queue_delayed_work(cm.wq, &work->work, 0); + } else { + kfree(work); + ret = -ENODEV; + } + spin_unlock_irq(&cm.lock); + out: return ret; } @@ -3392,6 +3489,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, enum ib_cm_event_type event; u16 attr_id; int paths = 0; + int going_down = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { case CM_REQ_ATTR_ID: @@ -3450,7 +3548,19 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = port; - queue_delayed_work(cm.wq, &work->work, 0); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!port->cm_dev->going_down) + queue_delayed_work(cm.wq, &work->work, 0); + else + going_down = 1; + spin_unlock_irq(&cm.lock); + + if (going_down) { + kfree(work); + ib_free_recv_mad(mad_recv_wc); + } } static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, @@ -3508,32 +3618,6 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; - if (!cm_id_priv->av.valid) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return -EINVAL; - } - if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) { - qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id; - *qp_attr_mask |= IB_QP_VID; - } - if (!is_zero_ether_addr(cm_id_priv->av.smac)) { - memcpy(qp_attr->smac, cm_id_priv->av.smac, - sizeof(qp_attr->smac)); - *qp_attr_mask |= IB_QP_SMAC; - } - if (cm_id_priv->alt_av.valid) { - if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) { - qp_attr->alt_vlan_id = - cm_id_priv->alt_av.ah_attr.vlan_id; - *qp_attr_mask |= IB_QP_ALT_VID; - } - if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) { - memcpy(qp_attr->alt_smac, - cm_id_priv->alt_av.smac, - sizeof(qp_attr->alt_smac)); - *qp_attr_mask |= IB_QP_ALT_SMAC; - } - } qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); @@ -3759,11 +3843,9 @@ static void cm_add_one(struct ib_device *ib_device) }; unsigned long flags; int ret; + int count = 0; u8 i; - if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB) - return; - cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * ib_device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) @@ -3771,7 +3853,7 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev->ib_device = ib_device; cm_get_ack_delay(cm_dev); - + cm_dev->going_down = 0; cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); @@ -3782,6 +3864,9 @@ static void cm_add_one(struct ib_device *ib_device) set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= ib_device->phys_port_cnt; i++) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = kzalloc(sizeof *port, GFP_KERNEL); if (!port) goto error1; @@ -3808,7 +3893,13 @@ static void cm_add_one(struct ib_device *ib_device) ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) goto error3; + + count++; } + + if (!count) + goto free; + ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); @@ -3824,18 +3915,22 @@ error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } +free: device_unregister(cm_dev->device); kfree(cm_dev); } -static void cm_remove_one(struct ib_device *ib_device) +static void cm_remove_one(struct ib_device *ib_device, void *client_data) { - struct cm_device *cm_dev; + struct cm_device *cm_dev = client_data; struct cm_port *port; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP @@ -3843,7 +3938,6 @@ static void cm_remove_one(struct ib_device *ib_device) unsigned long flags; int i; - cm_dev = ib_get_client_data(ib_device, &cm_client); if (!cm_dev) return; @@ -3851,11 +3945,23 @@ static void cm_remove_one(struct ib_device *ib_device) list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); + spin_lock_irq(&cm.lock); + cm_dev->going_down = 1; + spin_unlock_irq(&cm.lock); + for (i = 1; i <= ib_device->phys_port_cnt; i++) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); - ib_unregister_mad_agent(port->mad_agent); + /* + * We flush the queue here after the going_down set, this + * verify that no new works will be queued in the recv handler, + * after that we can call the unregister_mad_agent + */ flush_workqueue(cm.wq); + ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } device_unregister(cm_dev->device); diff --git a/kernel/drivers/infiniband/core/cma.c b/kernel/drivers/infiniband/core/cma.c index 38ffe0981..17a15c560 100644 --- a/kernel/drivers/infiniband/core/cma.c +++ b/kernel/drivers/infiniband/core/cma.c @@ -44,8 +44,12 @@ #include <linux/module.h> #include <net/route.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/tcp.h> #include <net/ipv6.h> +#include <net/ip_fib.h> +#include <net/ip6_route.h> #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> @@ -65,8 +69,36 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 +static const char * const cma_events[] = { + [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", + [RDMA_CM_EVENT_ADDR_ERROR] = "address error", + [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", + [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", + [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", + [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", + [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", + [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", + [RDMA_CM_EVENT_REJECTED] = "rejected", + [RDMA_CM_EVENT_ESTABLISHED] = "established", + [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", + [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", + [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", + [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", + [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", + [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", +}; + +const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) +{ + size_t index = event; + + return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? + cma_events[index] : "unrecognized event"; +} +EXPORT_SYMBOL(rdma_event_msg); + static void cma_add_one(struct ib_device *device); -static void cma_remove_one(struct ib_device *device); +static void cma_remove_one(struct ib_device *device, void *client_data); static struct ib_client cma_client = { .name = "cma", @@ -80,10 +112,37 @@ static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; -static DEFINE_IDR(tcp_ps); -static DEFINE_IDR(udp_ps); -static DEFINE_IDR(ipoib_ps); -static DEFINE_IDR(ib_ps); +static int cma_pernet_id; + +struct cma_pernet { + struct idr tcp_ps; + struct idr udp_ps; + struct idr ipoib_ps; + struct idr ib_ps; +}; + +static struct cma_pernet *cma_pernet(struct net *net) +{ + return net_generic(net, cma_pernet_id); +} + +static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps) +{ + struct cma_pernet *pernet = cma_pernet(net); + + switch (ps) { + case RDMA_PS_TCP: + return &pernet->tcp_ps; + case RDMA_PS_UDP: + return &pernet->udp_ps; + case RDMA_PS_IPOIB: + return &pernet->ipoib_ps; + case RDMA_PS_IB: + return &pernet->ib_ps; + default: + return NULL; + } +} struct cma_device { struct list_head list; @@ -94,11 +153,34 @@ struct cma_device { }; struct rdma_bind_list { - struct idr *ps; + enum rdma_port_space ps; struct hlist_head owners; unsigned short port; }; +static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, + struct rdma_bind_list *bind_list, int snum) +{ + struct idr *idr = cma_pernet_idr(net, ps); + + return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); +} + +static struct rdma_bind_list *cma_ps_find(struct net *net, + enum rdma_port_space ps, int snum) +{ + struct idr *idr = cma_pernet_idr(net, ps); + + return idr_find(idr, snum); +} + +static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum) +{ + struct idr *idr = cma_pernet_idr(net, ps); + + idr_remove(idr, snum); +} + enum { CMA_OPTION_AFONLY, }; @@ -197,6 +279,15 @@ struct cma_hdr { #define CMA_VERSION 0x00 +struct cma_req_info { + struct ib_device *device; + int port; + union ib_gid local_gid; + __be64 service_id; + u16 pkey; + bool has_gid:1; +}; + static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) { unsigned long flags; @@ -234,7 +325,7 @@ static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, return old; } -static inline u8 cma_get_ip_ver(struct cma_hdr *hdr) +static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) { return hdr->ip_version >> 4; } @@ -349,18 +440,40 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a return ret; } +static inline int cma_validate_port(struct ib_device *device, u8 port, + union ib_gid *gid, int dev_type, + int bound_if_index) +{ + int ret = -ENODEV; + struct net_device *ndev = NULL; + + if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) + return ret; + + if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) + return ret; + + if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) + ndev = dev_get_by_index(&init_net, bound_if_index); + + ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL); + + if (ndev) + dev_put(ndev); + + return ret; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv, struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; - union ib_gid gid, iboe_gid; + union ib_gid gid, iboe_gid, *gidp; int ret = -ENODEV; - u8 port, found_port; - enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? - IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; + u8 port; - if (dev_ll != IB_LINK_LAYER_INFINIBAND && + if (dev_addr->dev_type != ARPHRD_INFINIBAND && id_priv->id.ps == RDMA_PS_IPOIB) return -EINVAL; @@ -370,41 +483,38 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, memcpy(&gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof gid); - if (listen_id_priv && - rdma_port_get_link_layer(listen_id_priv->id.device, - listen_id_priv->id.port_num) == dev_ll) { + + if (listen_id_priv) { cma_dev = listen_id_priv->cma_dev; port = listen_id_priv->id.port_num; - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) - ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, - &found_port, NULL); - else - ret = ib_find_cached_gid(cma_dev->device, &gid, - &found_port, NULL); - - if (!ret && (port == found_port)) { - id_priv->id.port_num = found_port; + gidp = rdma_protocol_roce(cma_dev->device, port) ? + &iboe_gid : &gid; + + ret = cma_validate_port(cma_dev->device, port, gidp, + dev_addr->dev_type, + dev_addr->bound_dev_if); + if (!ret) { + id_priv->id.port_num = port; goto out; } } + list_for_each_entry(cma_dev, &dev_list, list) { for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { if (listen_id_priv && listen_id_priv->cma_dev == cma_dev && listen_id_priv->id.port_num == port) continue; - if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) - ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL); - else - ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL); - - if (!ret && (port == found_port)) { - id_priv->id.port_num = found_port; - goto out; - } + + gidp = rdma_protocol_roce(cma_dev->device, port) ? + &iboe_gid : &gid; + + ret = cma_validate_port(cma_dev->device, port, gidp, + dev_addr->dev_type, + dev_addr->bound_dev_if); + if (!ret) { + id_priv->id.port_num = port; + goto out; } } } @@ -435,14 +545,16 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) pkey = ntohs(addr->sib_pkey); list_for_each_entry(cur_dev, &dev_list, list) { - if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) - continue; - for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (!rdma_cap_af_ib(cur_dev->device, p)) + continue; + if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) continue; - for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) { + for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, + &gid, NULL); + i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; @@ -488,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv, return 0; } -struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, +struct rdma_cm_id *rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type) { @@ -512,6 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); + id_priv->id.route.addr.dev_addr.net = get_net(net); return &id_priv->id; } @@ -629,19 +743,12 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, goto out; ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, - qp_attr.ah_attr.grh.sgid_index, &sgid); + qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); if (ret) goto out; - if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) - == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) - == IB_LINK_LAYER_ETHERNET) { - ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); + BUG_ON(id_priv->cma_dev->device != id_priv->id.device); - if (ret) - goto out; - } if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); @@ -700,11 +807,10 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, int ret; u16 pkey; - if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) == - IB_LINK_LAYER_INFINIBAND) - pkey = ib_addr_get_pkey(dev_addr); - else + if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) pkey = 0xffff; + else + pkey = ib_addr_get_pkey(dev_addr); ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, pkey, &qp_attr->pkey_index); @@ -735,8 +841,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int ret = 0; id_priv = container_of(id, struct rdma_id_private, id); - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); else @@ -745,19 +850,15 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { if (!id_priv->cm_id.iw) { qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); - break; - default: + } else ret = -ENOSYS; - break; - } return ret; } @@ -837,107 +938,419 @@ static inline int cma_any_port(struct sockaddr *addr) return !cma_port(addr); } -static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, +static void cma_save_ib_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct rdma_cm_id *listen_id, struct ib_sa_path_rec *path) { struct sockaddr_ib *listen_ib, *ib; listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; - ib = (struct sockaddr_ib *) &id->route.addr.src_addr; - ib->sib_family = listen_ib->sib_family; - if (path) { - ib->sib_pkey = path->pkey; - ib->sib_flowinfo = path->flow_label; - memcpy(&ib->sib_addr, &path->sgid, 16); - } else { - ib->sib_pkey = listen_ib->sib_pkey; - ib->sib_flowinfo = listen_ib->sib_flowinfo; - ib->sib_addr = listen_ib->sib_addr; + if (src_addr) { + ib = (struct sockaddr_ib *)src_addr; + ib->sib_family = AF_IB; + if (path) { + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->sgid, 16); + ib->sib_sid = path->service_id; + ib->sib_scope_id = 0; + } else { + ib->sib_pkey = listen_ib->sib_pkey; + ib->sib_flowinfo = listen_ib->sib_flowinfo; + ib->sib_addr = listen_ib->sib_addr; + ib->sib_sid = listen_ib->sib_sid; + ib->sib_scope_id = listen_ib->sib_scope_id; + } + ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); } - ib->sib_sid = listen_ib->sib_sid; - ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); - ib->sib_scope_id = listen_ib->sib_scope_id; - - if (path) { - ib = (struct sockaddr_ib *) &id->route.addr.dst_addr; - ib->sib_family = listen_ib->sib_family; - ib->sib_pkey = path->pkey; - ib->sib_flowinfo = path->flow_label; - memcpy(&ib->sib_addr, &path->dgid, 16); + if (dst_addr) { + ib = (struct sockaddr_ib *)dst_addr; + ib->sib_family = AF_IB; + if (path) { + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->dgid, 16); + } } } -static __be16 ss_get_port(const struct sockaddr_storage *ss) -{ - if (ss->ss_family == AF_INET) - return ((struct sockaddr_in *)ss)->sin_port; - else if (ss->ss_family == AF_INET6) - return ((struct sockaddr_in6 *)ss)->sin6_port; - BUG(); -} - -static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, - struct cma_hdr *hdr) +static void cma_save_ip4_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct cma_hdr *hdr, + __be16 local_port) { struct sockaddr_in *ip4; - ip4 = (struct sockaddr_in *) &id->route.addr.src_addr; - ip4->sin_family = AF_INET; - ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; - ip4->sin_port = ss_get_port(&listen_id->route.addr.src_addr); + if (src_addr) { + ip4 = (struct sockaddr_in *)src_addr; + ip4->sin_family = AF_INET; + ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; + ip4->sin_port = local_port; + } - ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr; - ip4->sin_family = AF_INET; - ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; - ip4->sin_port = hdr->port; + if (dst_addr) { + ip4 = (struct sockaddr_in *)dst_addr; + ip4->sin_family = AF_INET; + ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; + ip4->sin_port = hdr->port; + } } -static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, - struct cma_hdr *hdr) +static void cma_save_ip6_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct cma_hdr *hdr, + __be16 local_port) { struct sockaddr_in6 *ip6; - ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr; - ip6->sin6_family = AF_INET6; - ip6->sin6_addr = hdr->dst_addr.ip6; - ip6->sin6_port = ss_get_port(&listen_id->route.addr.src_addr); + if (src_addr) { + ip6 = (struct sockaddr_in6 *)src_addr; + ip6->sin6_family = AF_INET6; + ip6->sin6_addr = hdr->dst_addr.ip6; + ip6->sin6_port = local_port; + } - ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr; - ip6->sin6_family = AF_INET6; - ip6->sin6_addr = hdr->src_addr.ip6; - ip6->sin6_port = hdr->port; + if (dst_addr) { + ip6 = (struct sockaddr_in6 *)dst_addr; + ip6->sin6_family = AF_INET6; + ip6->sin6_addr = hdr->src_addr.ip6; + ip6->sin6_port = hdr->port; + } } -static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event) +static u16 cma_port_from_service_id(__be64 service_id) { - struct cma_hdr *hdr; + return (u16)be64_to_cpu(service_id); +} - if (listen_id->route.addr.src_addr.ss_family == AF_IB) { - if (ib_event->event == IB_CM_REQ_RECEIVED) - cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path); - else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) - cma_save_ib_info(id, listen_id, NULL); - return 0; - } +static int cma_save_ip_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct ib_cm_event *ib_event, + __be64 service_id) +{ + struct cma_hdr *hdr; + __be16 port; hdr = ib_event->private_data; if (hdr->cma_version != CMA_VERSION) return -EINVAL; + port = htons(cma_port_from_service_id(service_id)); + switch (cma_get_ip_ver(hdr)) { case 4: - cma_save_ip4_info(id, listen_id, hdr); + cma_save_ip4_info(src_addr, dst_addr, hdr, port); break; case 6: - cma_save_ip6_info(id, listen_id, hdr); + cma_save_ip6_info(src_addr, dst_addr, hdr, port); + break; + default: + return -EAFNOSUPPORT; + } + + return 0; +} + +static int cma_save_net_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event, + sa_family_t sa_family, __be64 service_id) +{ + if (sa_family == AF_IB) { + if (ib_event->event == IB_CM_REQ_RECEIVED) + cma_save_ib_info(src_addr, dst_addr, listen_id, + ib_event->param.req_rcvd.primary_path); + else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) + cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); + return 0; + } + + return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); +} + +static int cma_save_req_info(const struct ib_cm_event *ib_event, + struct cma_req_info *req) +{ + const struct ib_cm_req_event_param *req_param = + &ib_event->param.req_rcvd; + const struct ib_cm_sidr_req_event_param *sidr_param = + &ib_event->param.sidr_req_rcvd; + + switch (ib_event->event) { + case IB_CM_REQ_RECEIVED: + req->device = req_param->listen_id->device; + req->port = req_param->port; + memcpy(&req->local_gid, &req_param->primary_path->sgid, + sizeof(req->local_gid)); + req->has_gid = true; + req->service_id = req_param->primary_path->service_id; + req->pkey = be16_to_cpu(req_param->primary_path->pkey); + break; + case IB_CM_SIDR_REQ_RECEIVED: + req->device = sidr_param->listen_id->device; + req->port = sidr_param->port; + req->has_gid = false; + req->service_id = sidr_param->service_id; + req->pkey = sidr_param->pkey; break; default: return -EINVAL; } + return 0; } +static bool validate_ipv4_net_dev(struct net_device *net_dev, + const struct sockaddr_in *dst_addr, + const struct sockaddr_in *src_addr) +{ + __be32 daddr = dst_addr->sin_addr.s_addr, + saddr = src_addr->sin_addr.s_addr; + struct fib_result res; + struct flowi4 fl4; + int err; + bool ret; + + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || + ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || + ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || + ipv4_is_loopback(saddr)) + return false; + + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_iif = net_dev->ifindex; + fl4.daddr = daddr; + fl4.saddr = saddr; + + rcu_read_lock(); + err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); + ret = err == 0 && FIB_RES_DEV(res) == net_dev; + rcu_read_unlock(); + + return ret; +} + +static bool validate_ipv6_net_dev(struct net_device *net_dev, + const struct sockaddr_in6 *dst_addr, + const struct sockaddr_in6 *src_addr) +{ +#if IS_ENABLED(CONFIG_IPV6) + const int strict = ipv6_addr_type(&dst_addr->sin6_addr) & + IPV6_ADDR_LINKLOCAL; + struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, + &src_addr->sin6_addr, net_dev->ifindex, + strict); + bool ret; + + if (!rt) + return false; + + ret = rt->rt6i_idev->dev == net_dev; + ip6_rt_put(rt); + + return ret; +#else + return false; +#endif +} + +static bool validate_net_dev(struct net_device *net_dev, + const struct sockaddr *daddr, + const struct sockaddr *saddr) +{ + const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; + const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; + const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; + const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; + + switch (daddr->sa_family) { + case AF_INET: + return saddr->sa_family == AF_INET && + validate_ipv4_net_dev(net_dev, daddr4, saddr4); + + case AF_INET6: + return saddr->sa_family == AF_INET6 && + validate_ipv6_net_dev(net_dev, daddr6, saddr6); + + default: + return false; + } +} + +static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, + const struct cma_req_info *req) +{ + struct sockaddr_storage listen_addr_storage, src_addr_storage; + struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, + *src_addr = (struct sockaddr *)&src_addr_storage; + struct net_device *net_dev; + const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; + int err; + + err = cma_save_ip_info(listen_addr, src_addr, ib_event, + req->service_id); + if (err) + return ERR_PTR(err); + + net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, + gid, listen_addr); + if (!net_dev) + return ERR_PTR(-ENODEV); + + if (!validate_net_dev(net_dev, listen_addr, src_addr)) { + dev_put(net_dev); + return ERR_PTR(-EHOSTUNREACH); + } + + return net_dev; +} + +static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) +{ + return (be64_to_cpu(service_id) >> 16) & 0xffff; +} + +static bool cma_match_private_data(struct rdma_id_private *id_priv, + const struct cma_hdr *hdr) +{ + struct sockaddr *addr = cma_src_addr(id_priv); + __be32 ip4_addr; + struct in6_addr ip6_addr; + + if (cma_any_addr(addr) && !id_priv->afonly) + return true; + + switch (addr->sa_family) { + case AF_INET: + ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; + if (cma_get_ip_ver(hdr) != 4) + return false; + if (!cma_any_addr(addr) && + hdr->dst_addr.ip4.addr != ip4_addr) + return false; + break; + case AF_INET6: + ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; + if (cma_get_ip_ver(hdr) != 6) + return false; + if (!cma_any_addr(addr) && + memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) + return false; + break; + case AF_IB: + return true; + default: + return false; + } + + return true; +} + +static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) +{ + enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); + enum rdma_transport_type transport = + rdma_node_get_transport(device->node_type); + + return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; +} + +static bool cma_protocol_roce(const struct rdma_cm_id *id) +{ + struct ib_device *device = id->device; + const int port_num = id->port_num ?: rdma_start_port(device); + + return cma_protocol_roce_dev_port(device, port_num); +} + +static bool cma_match_net_dev(const struct rdma_cm_id *id, + const struct net_device *net_dev, + u8 port_num) +{ + const struct rdma_addr *addr = &id->route.addr; + + if (!net_dev) + /* This request is an AF_IB request or a RoCE request */ + return (!id->port_num || id->port_num == port_num) && + (addr->src_addr.ss_family == AF_IB || + cma_protocol_roce_dev_port(id->device, port_num)); + + return !addr->dev_addr.bound_dev_if || + (net_eq(dev_net(net_dev), addr->dev_addr.net) && + addr->dev_addr.bound_dev_if == net_dev->ifindex); +} + +static struct rdma_id_private *cma_find_listener( + const struct rdma_bind_list *bind_list, + const struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event, + const struct cma_req_info *req, + const struct net_device *net_dev) +{ + struct rdma_id_private *id_priv, *id_priv_dev; + + if (!bind_list) + return ERR_PTR(-EINVAL); + + hlist_for_each_entry(id_priv, &bind_list->owners, node) { + if (cma_match_private_data(id_priv, ib_event->private_data)) { + if (id_priv->id.device == cm_id->device && + cma_match_net_dev(&id_priv->id, net_dev, req->port)) + return id_priv; + list_for_each_entry(id_priv_dev, + &id_priv->listen_list, + listen_list) { + if (id_priv_dev->id.device == cm_id->device && + cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) + return id_priv_dev; + } + } + } + + return ERR_PTR(-EINVAL); +} + +static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, + struct ib_cm_event *ib_event, + struct net_device **net_dev) +{ + struct cma_req_info req; + struct rdma_bind_list *bind_list; + struct rdma_id_private *id_priv; + int err; + + err = cma_save_req_info(ib_event, &req); + if (err) + return ERR_PTR(err); + + *net_dev = cma_get_net_dev(ib_event, &req); + if (IS_ERR(*net_dev)) { + if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { + /* Assuming the protocol is AF_IB */ + *net_dev = NULL; + } else if (cma_protocol_roce_dev_port(req.device, req.port)) { + /* TODO find the net dev matching the request parameters + * through the RoCE GID table */ + *net_dev = NULL; + } else { + return ERR_CAST(*net_dev); + } + } + + bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, + rdma_ps_from_service_id(req.service_id), + cma_port_from_service_id(req.service_id)); + id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); + if (IS_ERR(id_priv) && *net_dev) { + dev_put(*net_dev); + *net_dev = NULL; + } + + return id_priv; +} + static inline int cma_user_data_offset(struct rdma_id_private *id_priv) { return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); @@ -945,13 +1358,9 @@ static inline int cma_user_data_offset(struct rdma_id_private *id_priv) static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) { - case IB_LINK_LAYER_INFINIBAND: + if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); - break; - default: - break; } } @@ -1002,6 +1411,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, static void cma_release_port(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; + struct net *net = id_priv->id.route.addr.dev_addr.net; if (!bind_list) return; @@ -1009,7 +1419,7 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_lock(&lock); hlist_del(&id_priv->node); if (hlist_empty(&bind_list->owners)) { - idr_remove(bind_list->ps, bind_list->port); + cma_ps_remove(net, bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); @@ -1023,17 +1433,12 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); - switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) { - case IB_LINK_LAYER_INFINIBAND: + if (rdma_cap_ib_mcast(id_priv->cma_dev->device, + id_priv->id.port_num)) { ib_sa_free_multicast(mc->multicast.ib); kfree(mc); - break; - case IB_LINK_LAYER_ETHERNET: + } else kref_put(&mc->mcref, release_mc); - break; - default: - break; - } } } @@ -1054,17 +1459,12 @@ void rdma_destroy_id(struct rdma_cm_id *id) mutex_unlock(&id_priv->handler_mutex); if (id_priv->cma_dev) { - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) ib_destroy_cm_id(id_priv->cm_id.ib); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.iw) iw_destroy_cm_id(id_priv->cm_id.iw); - break; - default: - break; } cma_leave_mc_groups(id_priv); cma_release_dev(id_priv); @@ -1078,6 +1478,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); + put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } EXPORT_SYMBOL(rdma_destroy_id); @@ -1197,20 +1598,27 @@ out: } static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event) + struct ib_cm_event *ib_event, + struct net_device *net_dev) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; struct rdma_route *rt; + const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; + const __be64 service_id = + ib_event->param.req_rcvd.primary_path->service_id; int ret; - id = rdma_create_id(listen_id->event_handler, listen_id->context, + id = rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; id_priv = container_of(id, struct rdma_id_private, id); - if (cma_save_net_info(id, listen_id, ib_event)) + if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, + (struct sockaddr *)&id->route.addr.dst_addr, + listen_id, ib_event, ss_family, service_id)) goto err; rt = &id->route; @@ -1224,14 +1632,21 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - if (cma_any_addr(cma_src_addr(id_priv))) { - rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; - rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); - ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); - } else { - ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); + if (net_dev) { + ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); if (ret) goto err; + } else { + if (!cma_protocol_roce(listen_id) && + cma_any_addr(cma_src_addr(id_priv))) { + rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; + rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); + ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); + } else if (!cma_any_addr(cma_src_addr(id_priv))) { + ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); + if (ret) + goto err; + } } rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); @@ -1244,25 +1659,38 @@ err: } static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event) + struct ib_cm_event *ib_event, + struct net_device *net_dev) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; + const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; + struct net *net = listen_id->route.addr.dev_addr.net; int ret; - id = rdma_create_id(listen_id->event_handler, listen_id->context, + id = rdma_create_id(net, listen_id->event_handler, listen_id->context, listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; id_priv = container_of(id, struct rdma_id_private, id); - if (cma_save_net_info(id, listen_id, ib_event)) + if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, + (struct sockaddr *)&id->route.addr.dst_addr, + listen_id, ib_event, ss_family, + ib_event->param.sidr_req_rcvd.service_id)) goto err; - if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { - ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr); + if (net_dev) { + ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); if (ret) goto err; + } else { + if (!cma_any_addr(cma_src_addr(id_priv))) { + ret = cma_translate_addr(cma_src_addr(id_priv), + &id->route.addr.dev_addr); + if (ret) + goto err; + } } id_priv->state = RDMA_CM_CONNECT; @@ -1300,25 +1728,33 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; + struct net_device *net_dev; int offset, ret; - listen_id = cm_id->context; - if (!cma_check_req_qp_type(&listen_id->id, ib_event)) - return -EINVAL; + listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); + if (IS_ERR(listen_id)) + return PTR_ERR(listen_id); - if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) - return -ECONNABORTED; + if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { + ret = -EINVAL; + goto net_dev_put; + } + + if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) { + ret = -ECONNABORTED; + goto net_dev_put; + } memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { - conn_id = cma_new_udp_id(&listen_id->id, ib_event); + conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { - conn_id = cma_new_conn_id(&listen_id->id, ib_event); + conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); } @@ -1356,6 +1792,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); + if (net_dev) + dev_put(net_dev); return 0; err3: @@ -1369,6 +1807,11 @@ err1: mutex_unlock(&listen_id->handler_mutex); if (conn_id) rdma_destroy_id(&conn_id->id); + +net_dev_put: + if (net_dev) + dev_put(net_dev); + return ret; } @@ -1381,42 +1824,6 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) } EXPORT_SYMBOL(rdma_get_service_id); -static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, - struct ib_cm_compare_data *compare) -{ - struct cma_hdr *cma_data, *cma_mask; - __be32 ip4_addr; - struct in6_addr ip6_addr; - - memset(compare, 0, sizeof *compare); - cma_data = (void *) compare->data; - cma_mask = (void *) compare->mask; - - switch (addr->sa_family) { - case AF_INET: - ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; - cma_set_ip_ver(cma_data, 4); - cma_set_ip_ver(cma_mask, 0xF); - if (!cma_any_addr(addr)) { - cma_data->dst_addr.ip4.addr = ip4_addr; - cma_mask->dst_addr.ip4.addr = htonl(~0); - } - break; - case AF_INET6: - ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; - cma_set_ip_ver(cma_data, 6); - cma_set_ip_ver(cma_mask, 0xF); - if (!cma_any_addr(addr)) { - cma_data->dst_addr.ip6 = ip6_addr; - memset(&cma_mask->dst_addr.ip6, 0xFF, - sizeof cma_mask->dst_addr.ip6); - } - break; - default: - break; - } -} - static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; @@ -1498,7 +1905,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = rdma_create_id(listen_id->id.event_handler, + new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, listen_id->id.context, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { @@ -1570,33 +1978,18 @@ out: static int cma_ib_listen(struct rdma_id_private *id_priv) { - struct ib_cm_compare_data compare_data; struct sockaddr *addr; struct ib_cm_id *id; __be64 svc_id; - int ret; - id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv); + addr = cma_src_addr(id_priv); + svc_id = rdma_get_service_id(&id_priv->id, addr); + id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); if (IS_ERR(id)) return PTR_ERR(id); - id_priv->cm_id.ib = id; - addr = cma_src_addr(id_priv); - svc_id = rdma_get_service_id(&id_priv->id, addr); - if (cma_any_addr(addr) && !id_priv->afonly) - ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); - else { - cma_set_compare_data(id_priv->id.ps, addr, &compare_data); - ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data); - } - - if (ret) { - ib_destroy_cm_id(id_priv->cm_id.ib); - id_priv->cm_id.ib = NULL; - } - - return ret; + return 0; } static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) @@ -1610,6 +2003,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) if (IS_ERR(id)) return PTR_ERR(id); + id->tos = id_priv->tos; id_priv->cm_id.iw = id; memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), @@ -1640,13 +2034,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, { struct rdma_id_private *dev_id_priv; struct rdma_cm_id *id; + struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; - if (cma_family(id_priv) == AF_IB && - rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB) + if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; - id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, + id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) return; @@ -1925,16 +2319,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->num_paths = 1; - if (addr->dev_addr.bound_dev_if) + if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); + route->path_rec->net = &init_net; + route->path_rec->ifindex = addr->dev_addr.bound_dev_if; + } if (!ndev) { ret = -ENODEV; goto err2; } - route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); - memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &route->path_rec->sgid); @@ -1984,26 +2379,15 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) return -EINVAL; atomic_inc(&id_priv->refcount); - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ret = cma_resolve_ib_route(id_priv, timeout_ms); - break; - case IB_LINK_LAYER_ETHERNET: - ret = cma_resolve_iboe_route(id_priv); - break; - default: - ret = -ENOSYS; - } - break; - case RDMA_TRANSPORT_IWARP: + if (rdma_cap_ib_sa(id->device, id->port_num)) + ret = cma_resolve_ib_route(id_priv, timeout_ms); + else if (rdma_protocol_roce(id->device, id->port_num)) + ret = cma_resolve_iboe_route(id_priv); + else if (rdma_protocol_iwarp(id->device, id->port_num)) ret = cma_resolve_iw_route(id_priv, timeout_ms); - break; - default: + else ret = -ENOSYS; - break; - } + if (ret) goto err; @@ -2045,7 +2429,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { if (cma_family(id_priv) == AF_IB && - rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) + !rdma_cap_ib_cm(cur_dev->device, 1)) continue; if (!cma_dev) @@ -2068,7 +2452,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) p = 1; port_found: - ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); + ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); if (ret) goto out; @@ -2077,7 +2461,7 @@ port_found: goto out; id_priv->id.route.addr.dev_addr.dev_type = - (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ? + (rdma_protocol_ib(cma_dev->device, p)) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); @@ -2195,8 +2579,11 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, src_addr = (struct sockaddr *) &id->route.addr.src_addr; src_addr->sa_family = dst_addr->sa_family; if (dst_addr->sa_family == AF_INET6) { - ((struct sockaddr_in6 *) src_addr)->sin6_scope_id = - ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; + struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; + struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; } else if (dst_addr->sa_family == AF_IB) { ((struct sockaddr_ib *) src_addr)->sib_pkey = ((struct sockaddr_ib *) dst_addr)->sib_pkey; @@ -2317,8 +2704,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list, hlist_add_head(&id_priv->node, &bind_list->owners); } -static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv, - unsigned short snum) +static int cma_alloc_port(enum rdma_port_space ps, + struct rdma_id_private *id_priv, unsigned short snum) { struct rdma_bind_list *bind_list; int ret; @@ -2327,7 +2714,8 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv, if (!bind_list) return -ENOMEM; - ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL); + ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, + snum); if (ret < 0) goto err; @@ -2340,18 +2728,20 @@ err: return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; } -static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) +static int cma_alloc_any_port(enum rdma_port_space ps, + struct rdma_id_private *id_priv) { static unsigned int last_used_port; int low, high, remaining; unsigned int rover; + struct net *net = id_priv->id.route.addr.dev_addr.net; - inet_get_local_port_range(&init_net, &low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; retry: if (last_used_port != rover && - !idr_find(ps, (unsigned short) rover)) { + !cma_ps_find(net, ps, (unsigned short)rover)) { int ret = cma_alloc_port(ps, id_priv, rover); /* * Remember previously used port number in order to avoid @@ -2406,7 +2796,8 @@ static int cma_check_port(struct rdma_bind_list *bind_list, return 0; } -static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) +static int cma_use_port(enum rdma_port_space ps, + struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list; unsigned short snum; @@ -2416,7 +2807,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; - bind_list = idr_find(ps, snum); + bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); if (!bind_list) { ret = cma_alloc_port(ps, id_priv, snum); } else { @@ -2439,25 +2830,24 @@ static int cma_bind_listen(struct rdma_id_private *id_priv) return ret; } -static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv) +static enum rdma_port_space cma_select_inet_ps( + struct rdma_id_private *id_priv) { switch (id_priv->id.ps) { case RDMA_PS_TCP: - return &tcp_ps; case RDMA_PS_UDP: - return &udp_ps; case RDMA_PS_IPOIB: - return &ipoib_ps; case RDMA_PS_IB: - return &ib_ps; + return id_priv->id.ps; default: - return NULL; + + return 0; } } -static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv) +static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) { - struct idr *ps = NULL; + enum rdma_port_space ps = 0; struct sockaddr_ib *sib; u64 sid_ps, mask, sid; @@ -2467,15 +2857,15 @@ static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv) if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { sid_ps = RDMA_IB_IP_PS_IB; - ps = &ib_ps; + ps = RDMA_PS_IB; } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && (sid == (RDMA_IB_IP_PS_TCP & mask))) { sid_ps = RDMA_IB_IP_PS_TCP; - ps = &tcp_ps; + ps = RDMA_PS_TCP; } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && (sid == (RDMA_IB_IP_PS_UDP & mask))) { sid_ps = RDMA_IB_IP_PS_UDP; - ps = &udp_ps; + ps = RDMA_PS_UDP; } if (ps) { @@ -2488,7 +2878,7 @@ static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv) static int cma_get_port(struct rdma_id_private *id_priv) { - struct idr *ps; + enum rdma_port_space ps; int ret; if (cma_family(id_priv) != AF_IB) @@ -2554,18 +2944,15 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv->backlog = backlog; if (id->device) { - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, 1)) { ret = cma_ib_listen(id_priv); if (ret) goto err; - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, 1)) { ret = cma_iw_listen(id_priv, backlog); if (ret) goto err; - break; - default: + } else { ret = -ENOSYS; goto err; } @@ -2612,8 +2999,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (addr->sa_family == AF_INET) id_priv->afonly = 1; #if IS_ENABLED(CONFIG_IPV6) - else if (addr->sa_family == AF_INET6) - id_priv->afonly = init_net.ipv6.sysctl.bindv6only; + else if (addr->sa_family == AF_INET6) { + struct net *net = id_priv->id.route.addr.dev_addr.net; + + id_priv->afonly = net->ipv6.sysctl.bindv6only; + } #endif } ret = cma_get_port(id_priv); @@ -2857,6 +3247,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, if (IS_ERR(cm_id)) return PTR_ERR(cm_id); + cm_id->tos = id_priv->tos; id_priv->cm_id.iw = cm_id; memcpy(&cm_id->local_addr, cma_src_addr(id_priv), @@ -2901,20 +3292,15 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_connect_iw(id_priv, conn_param); - break; - default: + else ret = -ENOSYS; - break; - } if (ret) goto err; @@ -3017,8 +3403,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) { if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, @@ -3034,14 +3419,10 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) else ret = cma_rep_recv(id_priv); } - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_accept_iw(id_priv, conn_param); - break; - default: + else ret = -ENOSYS; - break; - } if (ret) goto reject; @@ -3085,8 +3466,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, if (!id_priv->cm_id.ib) return -EINVAL; - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); @@ -3094,15 +3474,12 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); - break; - default: + } else ret = -ENOSYS; - break; - } + return ret; } EXPORT_SYMBOL(rdma_reject); @@ -3116,22 +3493,18 @@ int rdma_disconnect(struct rdma_cm_id *id) if (!id_priv->cm_id.ib) return -EINVAL; - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { ret = cma_modify_qp_err(id_priv); if (ret) goto out; /* Initiate or respond to a disconnect. */ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); - break; - default: + } else ret = -EINVAL; - break; - } + out: return ret; } @@ -3377,24 +3750,13 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ret = cma_join_ib_multicast(id_priv, mc); - break; - case IB_LINK_LAYER_ETHERNET: - kref_init(&mc->mcref); - ret = cma_iboe_join_multicast(id_priv, mc); - break; - default: - ret = -EINVAL; - } - break; - default: + if (rdma_protocol_roce(id->device, id->port_num)) { + kref_init(&mc->mcref); + ret = cma_iboe_join_multicast(id_priv, mc); + } else if (rdma_cap_ib_mcast(id->device, id->port_num)) + ret = cma_join_ib_multicast(id_priv, mc); + else ret = -ENOSYS; - break; - } if (ret) { spin_lock_irq(&id_priv->lock); @@ -3422,19 +3784,15 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, be16_to_cpu(mc->multicast.ib->rec.mlid)); - if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - break; - case IB_LINK_LAYER_ETHERNET: - kref_put(&mc->mcref, release_mc); - break; - default: - break; - } - } + + BUG_ON(id_priv->cma_dev->device != id->device); + + if (rdma_cap_ib_mcast(id->device, id->port_num)) { + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + } else if (rdma_protocol_roce(id->device, id->port_num)) + kref_put(&mc->mcref, release_mc); + return; } } @@ -3450,6 +3808,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id dev_addr = &id_priv->id.route.addr.dev_addr; if ((dev_addr->bound_dev_if == ndev->ifindex) && + (net_eq(dev_net(ndev), dev_addr->net)) && memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", ndev->name, &id_priv->id); @@ -3475,9 +3834,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, struct rdma_id_private *id_priv; int ret = NOTIFY_DONE; - if (dev_net(ndev) != &init_net) - return NOTIFY_DONE; - if (event != NETDEV_BONDING_FAILOVER) return NOTIFY_DONE; @@ -3578,11 +3934,10 @@ static void cma_process_remove(struct cma_device *cma_dev) wait_for_completion(&cma_dev->comp); } -static void cma_remove_one(struct ib_device *device) +static void cma_remove_one(struct ib_device *device, void *client_data) { - struct cma_device *cma_dev; + struct cma_device *cma_dev = client_data; - cma_dev = ib_get_client_data(device, &cma_client); if (!cma_dev) return; @@ -3673,6 +4028,35 @@ static const struct ibnl_client_cbs cma_cb_table[] = { .module = THIS_MODULE }, }; +static int cma_init_net(struct net *net) +{ + struct cma_pernet *pernet = cma_pernet(net); + + idr_init(&pernet->tcp_ps); + idr_init(&pernet->udp_ps); + idr_init(&pernet->ipoib_ps); + idr_init(&pernet->ib_ps); + + return 0; +} + +static void cma_exit_net(struct net *net) +{ + struct cma_pernet *pernet = cma_pernet(net); + + idr_destroy(&pernet->tcp_ps); + idr_destroy(&pernet->udp_ps); + idr_destroy(&pernet->ipoib_ps); + idr_destroy(&pernet->ib_ps); +} + +static struct pernet_operations cma_pernet_operations = { + .init = cma_init_net, + .exit = cma_exit_net, + .id = &cma_pernet_id, + .size = sizeof(struct cma_pernet), +}; + static int __init cma_init(void) { int ret; @@ -3681,6 +4065,10 @@ static int __init cma_init(void) if (!cma_wq) return -ENOMEM; + ret = register_pernet_subsys(&cma_pernet_operations); + if (ret) + goto err_wq; + ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); register_netdevice_notifier(&cma_nb); @@ -3698,6 +4086,7 @@ err: unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); +err_wq: destroy_workqueue(cma_wq); return ret; } @@ -3709,11 +4098,8 @@ static void __exit cma_cleanup(void) unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); destroy_workqueue(cma_wq); - idr_destroy(&tcp_ps); - idr_destroy(&udp_ps); - idr_destroy(&ipoib_ps); - idr_destroy(&ib_ps); } module_init(cma_init); diff --git a/kernel/drivers/infiniband/core/core_priv.h b/kernel/drivers/infiniband/core/core_priv.h index 87d1936f5..5cf6eb716 100644 --- a/kernel/drivers/infiniband/core/core_priv.h +++ b/kernel/drivers/infiniband/core/core_priv.h @@ -43,12 +43,53 @@ int ib_device_register_sysfs(struct ib_device *device, u8, struct kobject *)); void ib_device_unregister_sysfs(struct ib_device *device); -int ib_sysfs_setup(void); -void ib_sysfs_cleanup(void); - -int ib_cache_setup(void); +void ib_cache_setup(void); void ib_cache_cleanup(void); -int ib_resolve_eth_l2_attrs(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask); +int ib_resolve_eth_dmac(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, int *qp_attr_mask); + +typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, + struct net_device *idev, void *cookie); + +typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port, + struct net_device *idev, void *cookie); + +void ib_enum_roce_netdev(struct ib_device *ib_dev, + roce_netdev_filter filter, + void *filter_cookie, + roce_netdev_callback cb, + void *cookie); +void ib_enum_all_roce_netdevs(roce_netdev_filter filter, + void *filter_cookie, + roce_netdev_callback cb, + void *cookie); + +enum ib_cache_gid_default_mode { + IB_CACHE_GID_DEFAULT_MODE_SET, + IB_CACHE_GID_DEFAULT_MODE_DELETE +}; + +void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, + struct net_device *ndev, + enum ib_cache_gid_default_mode mode); + +int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr); + +int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr); + +int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, + struct net_device *ndev); + +int roce_gid_mgmt_init(void); +void roce_gid_mgmt_cleanup(void); + +int roce_rescan_device(struct ib_device *ib_dev); + +int ib_cache_setup_one(struct ib_device *device); +void ib_cache_cleanup_one(struct ib_device *device); +void ib_cache_release_one(struct ib_device *device); + #endif /* _CORE_PRIV_H */ diff --git a/kernel/drivers/infiniband/core/device.c b/kernel/drivers/infiniband/core/device.c index 18c1ece76..179e8134d 100644 --- a/kernel/drivers/infiniband/core/device.c +++ b/kernel/drivers/infiniband/core/device.c @@ -38,7 +38,10 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/mutex.h> +#include <linux/netdevice.h> #include <rdma/rdma_netlink.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> #include "core_priv.h" @@ -50,22 +53,34 @@ struct ib_client_data { struct list_head list; struct ib_client *client; void * data; + /* The device or client is going down. Do not call client or device + * callbacks other than remove(). */ + bool going_down; }; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); +/* The device_list and client_list contain devices and clients after their + * registration has completed, and the devices and clients are removed + * during unregistration. */ static LIST_HEAD(device_list); static LIST_HEAD(client_list); /* - * device_mutex protects access to both device_list and client_list. - * There's no real point to using multiple locks or something fancier - * like an rwsem: we always access both lists, and we're always - * modifying one list or the other list. In any case this is not a - * hot path so there's no point in trying to optimize. + * device_mutex and lists_rwsem protect access to both device_list and + * client_list. device_mutex protects writer access by device and client + * registration / de-registration. lists_rwsem protects reader access to + * these lists. Iterators of these lists must lock it for read, while updates + * to the lists must be done with a write lock. A special case is when the + * device_mutex is locked. In this case locking the lists for read access is + * not necessary as the device_mutex implies it. + * + * lists_rwsem also protects access to the client data list. */ static DEFINE_MUTEX(device_mutex); +static DECLARE_RWSEM(lists_rwsem); + static int ib_device_check_mandatory(struct ib_device *device) { @@ -92,7 +107,8 @@ static int ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), - IB_MANDATORY_FUNC(dereg_mr) + IB_MANDATORY_FUNC(dereg_mr), + IB_MANDATORY_FUNC(get_port_immutable) }; int i; @@ -151,18 +167,36 @@ static int alloc_name(char *name) return 0; } -static int start_port(struct ib_device *device) +static void ib_device_release(struct device *device) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; -} + struct ib_device *dev = container_of(device, struct ib_device, dev); + ib_cache_release_one(dev); + kfree(dev->port_immutable); + kfree(dev); +} -static int end_port(struct ib_device *device) +static int ib_device_uevent(struct device *device, + struct kobj_uevent_env *env) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; + struct ib_device *dev = container_of(device, struct ib_device, dev); + + if (add_uevent_var(env, "NAME=%s", dev->name)) + return -ENOMEM; + + /* + * It would be nice to pass the node GUID with the event... + */ + + return 0; } +static struct class ib_class = { + .name = "infiniband", + .dev_release = ib_device_release, + .dev_uevent = ib_device_uevent, +}; + /** * ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate @@ -175,9 +209,27 @@ static int end_port(struct ib_device *device) */ struct ib_device *ib_alloc_device(size_t size) { - BUG_ON(size < sizeof (struct ib_device)); + struct ib_device *device; + + if (WARN_ON(size < sizeof(struct ib_device))) + return NULL; + + device = kzalloc(size, GFP_KERNEL); + if (!device) + return NULL; + + device->dev.class = &ib_class; + device_initialize(&device->dev); + + dev_set_drvdata(&device->dev, device); + + INIT_LIST_HEAD(&device->event_handler_list); + spin_lock_init(&device->event_handler_lock); + spin_lock_init(&device->client_data_lock); + INIT_LIST_HEAD(&device->client_data_list); + INIT_LIST_HEAD(&device->port_list); - return kzalloc(size, GFP_KERNEL); + return device; } EXPORT_SYMBOL(ib_alloc_device); @@ -189,13 +241,8 @@ EXPORT_SYMBOL(ib_alloc_device); */ void ib_dealloc_device(struct ib_device *device) { - if (device->reg_state == IB_DEV_UNINITIALIZED) { - kfree(device); - return; - } - - BUG_ON(device->reg_state != IB_DEV_UNREGISTERED); - + WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && + device->reg_state != IB_DEV_UNINITIALIZED); kobject_put(&device->dev.kobj); } EXPORT_SYMBOL(ib_dealloc_device); @@ -214,51 +261,53 @@ static int add_client_context(struct ib_device *device, struct ib_client *client context->client = client; context->data = NULL; + context->going_down = false; + down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_add(&context->list, &device->client_data_list); spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); return 0; } -static int read_port_table_lengths(struct ib_device *device) +static int verify_immutable(const struct ib_device *dev, u8 port) { - struct ib_port_attr *tprops = NULL; - int num_ports, ret = -ENOMEM; - u8 port_index; - - tprops = kmalloc(sizeof *tprops, GFP_KERNEL); - if (!tprops) - goto out; - - num_ports = end_port(device) - start_port(device) + 1; + return WARN_ON(!rdma_cap_ib_mad(dev, port) && + rdma_max_mad_size(dev, port) != 0); +} - device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports, - GFP_KERNEL); - device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports, - GFP_KERNEL); - if (!device->pkey_tbl_len || !device->gid_tbl_len) - goto err; +static int read_port_immutable(struct ib_device *device) +{ + int ret; + u8 start_port = rdma_start_port(device); + u8 end_port = rdma_end_port(device); + u8 port; + + /** + * device->port_immutable is indexed directly by the port number to make + * access to this data as efficient as possible. + * + * Therefore port_immutable is declared as a 1 based array with + * potential empty slots at the beginning. + */ + device->port_immutable = kzalloc(sizeof(*device->port_immutable) + * (end_port + 1), + GFP_KERNEL); + if (!device->port_immutable) + return -ENOMEM; - for (port_index = 0; port_index < num_ports; ++port_index) { - ret = ib_query_port(device, port_index + start_port(device), - tprops); + for (port = start_port; port <= end_port; ++port) { + ret = device->get_port_immutable(device, port, + &device->port_immutable[port]); if (ret) - goto err; - device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len; - device->gid_tbl_len[port_index] = tprops->gid_tbl_len; - } - - ret = 0; - goto out; + return ret; -err: - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); -out: - kfree(tprops); - return ret; + if (verify_immutable(device, port)) + return -EINVAL; + } + return 0; } /** @@ -275,6 +324,7 @@ int ib_register_device(struct ib_device *device, u8, struct kobject *)) { int ret; + struct ib_client *client; mutex_lock(&device_mutex); @@ -289,40 +339,37 @@ int ib_register_device(struct ib_device *device, goto out; } - INIT_LIST_HEAD(&device->event_handler_list); - INIT_LIST_HEAD(&device->client_data_list); - spin_lock_init(&device->event_handler_lock); - spin_lock_init(&device->client_data_lock); - - ret = read_port_table_lengths(device); + ret = read_port_immutable(device); if (ret) { - printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n", + printk(KERN_WARNING "Couldn't create per port immutable data %s\n", device->name); goto out; } + ret = ib_cache_setup_one(device); + if (ret) { + printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); + goto out; + } + ret = ib_device_register_sysfs(device, port_callback); if (ret) { printk(KERN_WARNING "Couldn't register device %s with driver model\n", device->name); - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); + ib_cache_cleanup_one(device); goto out; } - list_add_tail(&device->core_list, &device_list); - device->reg_state = IB_DEV_REGISTERED; - { - struct ib_client *client; - - list_for_each_entry(client, &client_list, list) - if (client->add && !add_client_context(device, client)) - client->add(device); - } + list_for_each_entry(client, &client_list, list) + if (client->add && !add_client_context(device, client)) + client->add(device); - out: + down_write(&lists_rwsem); + list_add_tail(&device->core_list, &device_list); + up_write(&lists_rwsem); +out: mutex_unlock(&device_mutex); return ret; } @@ -336,29 +383,37 @@ EXPORT_SYMBOL(ib_register_device); */ void ib_unregister_device(struct ib_device *device) { - struct ib_client *client; struct ib_client_data *context, *tmp; unsigned long flags; mutex_lock(&device_mutex); - list_for_each_entry_reverse(client, &client_list, list) - if (client->remove) - client->remove(device); - + down_write(&lists_rwsem); list_del(&device->core_list); + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + context->going_down = true; + spin_unlock_irqrestore(&device->client_data_lock, flags); + downgrade_write(&lists_rwsem); - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); + list_for_each_entry_safe(context, tmp, &device->client_data_list, + list) { + if (context->client->remove) + context->client->remove(device, context->data); + } + up_read(&lists_rwsem); mutex_unlock(&device_mutex); ib_device_unregister_sysfs(device); + ib_cache_cleanup_one(device); + down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) kfree(context); spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); device->reg_state = IB_DEV_UNREGISTERED; } @@ -383,11 +438,14 @@ int ib_register_client(struct ib_client *client) mutex_lock(&device_mutex); - list_add_tail(&client->list, &client_list); list_for_each_entry(device, &device_list, core_list) if (client->add && !add_client_context(device, client)) client->add(device); + down_write(&lists_rwsem); + list_add_tail(&client->list, &client_list); + up_write(&lists_rwsem); + mutex_unlock(&device_mutex); return 0; @@ -410,19 +468,41 @@ void ib_unregister_client(struct ib_client *client) mutex_lock(&device_mutex); + down_write(&lists_rwsem); + list_del(&client->list); + up_write(&lists_rwsem); + list_for_each_entry(device, &device_list, core_list) { - if (client->remove) - client->remove(device); + struct ib_client_data *found_context = NULL; + down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) if (context->client == client) { - list_del(&context->list); - kfree(context); + context->going_down = true; + found_context = context; + break; } spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); + + if (client->remove) + client->remove(device, found_context ? + found_context->data : NULL); + + if (!found_context) { + pr_warn("No client context found for %s/%s\n", + device->name, client->name); + continue; + } + + down_write(&lists_rwsem); + spin_lock_irqsave(&device->client_data_lock, flags); + list_del(&found_context->list); + kfree(found_context); + spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); } - list_del(&client->list); mutex_unlock(&device_mutex); } @@ -558,7 +638,11 @@ EXPORT_SYMBOL(ib_dispatch_event); int ib_query_device(struct ib_device *device, struct ib_device_attr *device_attr) { - return device->query_device(device, device_attr); + struct ib_udata uhw = {.outlen = 0, .inlen = 0}; + + memset(device_attr, 0, sizeof(*device_attr)); + + return device->query_device(device, device_attr, &uhw); } EXPORT_SYMBOL(ib_query_device); @@ -575,7 +659,7 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; return device->query_port(device, port_num, port_attr); @@ -588,17 +672,92 @@ EXPORT_SYMBOL(ib_query_port); * @port_num:Port number to query * @index:GID table index to query * @gid:Returned GID + * @attr: Returned GID attributes related to this GID index (only in RoCE). + * NULL means ignore. * * ib_query_gid() fetches the specified GID table entry. */ int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid) + u8 port_num, int index, union ib_gid *gid, + struct ib_gid_attr *attr) { + if (rdma_cap_roce_gid_table(device, port_num)) + return ib_get_cached_gid(device, port_num, index, gid, attr); + + if (attr) + return -EINVAL; + return device->query_gid(device, port_num, index, gid); } EXPORT_SYMBOL(ib_query_gid); /** + * ib_enum_roce_netdev - enumerate all RoCE ports + * @ib_dev : IB device we want to query + * @filter: Should we call the callback? + * @filter_cookie: Cookie passed to filter + * @cb: Callback to call for each found RoCE ports + * @cookie: Cookie passed back to the callback + * + * Enumerates all of the physical RoCE ports of ib_dev + * which are related to netdevice and calls callback() on each + * device for which filter() function returns non zero. + */ +void ib_enum_roce_netdev(struct ib_device *ib_dev, + roce_netdev_filter filter, + void *filter_cookie, + roce_netdev_callback cb, + void *cookie) +{ + u8 port; + + for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev); + port++) + if (rdma_protocol_roce(ib_dev, port)) { + struct net_device *idev = NULL; + + if (ib_dev->get_netdev) + idev = ib_dev->get_netdev(ib_dev, port); + + if (idev && + idev->reg_state >= NETREG_UNREGISTERED) { + dev_put(idev); + idev = NULL; + } + + if (filter(ib_dev, port, idev, filter_cookie)) + cb(ib_dev, port, idev, cookie); + + if (idev) + dev_put(idev); + } +} + +/** + * ib_enum_all_roce_netdevs - enumerate all RoCE devices + * @filter: Should we call the callback? + * @filter_cookie: Cookie passed to filter + * @cb: Callback to call for each found RoCE ports + * @cookie: Cookie passed back to the callback + * + * Enumerates all RoCE devices' physical ports which are related + * to netdevices and calls callback() on each device for which + * filter() function returns non zero. + */ +void ib_enum_all_roce_netdevs(roce_netdev_filter filter, + void *filter_cookie, + roce_netdev_callback cb, + void *cookie) +{ + struct ib_device *dev; + + down_read(&lists_rwsem); + list_for_each_entry(dev, &device_list, core_list) + ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); + up_read(&lists_rwsem); +} + +/** * ib_query_pkey - Get P_Key table entry * @device:Device to query * @port_num:Port number to query @@ -653,7 +812,7 @@ int ib_modify_port(struct ib_device *device, if (!device->modify_port) return -ENOSYS; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; return device->modify_port(device, port_num, port_modify_mask, @@ -666,19 +825,28 @@ EXPORT_SYMBOL(ib_modify_port); * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. + * @ndev: The ndev related to the GID to search for. * @port_num: The port number of the device where the GID value was found. * @index: The index into the GID table where the GID was found. This * parameter may be NULL. */ int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index) + struct net_device *ndev, u8 *port_num, u16 *index) { union ib_gid tmp_gid; int ret, port, i; - for (port = start_port(device); port <= end_port(device); ++port) { - for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) { - ret = ib_query_gid(device, port, i, &tmp_gid); + for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { + if (rdma_cap_roce_gid_table(device, port)) { + if (!ib_find_cached_gid_by_port(device, gid, port, + ndev, index)) { + *port_num = port; + return 0; + } + } + + for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { + ret = ib_query_gid(device, port, i, &tmp_gid, NULL); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { @@ -709,7 +877,7 @@ int ib_find_pkey(struct ib_device *device, u16 tmp_pkey; int partial_ix = -1; - for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { + for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; @@ -733,6 +901,51 @@ int ib_find_pkey(struct ib_device *device, } EXPORT_SYMBOL(ib_find_pkey); +/** + * ib_get_net_dev_by_params() - Return the appropriate net_dev + * for a received CM request + * @dev: An RDMA device on which the request has been received. + * @port: Port number on the RDMA device. + * @pkey: The Pkey the request came on. + * @gid: A GID that the net_dev uses to communicate. + * @addr: Contains the IP address that the request specified as its + * destination. + */ +struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, + u8 port, + u16 pkey, + const union ib_gid *gid, + const struct sockaddr *addr) +{ + struct net_device *net_dev = NULL; + struct ib_client_data *context; + + if (!rdma_protocol_ib(dev, port)) + return NULL; + + down_read(&lists_rwsem); + + list_for_each_entry(context, &dev->client_data_list, list) { + struct ib_client *client = context->client; + + if (context->going_down) + continue; + + if (client->get_net_dev_by_params) { + net_dev = client->get_net_dev_by_params(dev, port, pkey, + gid, addr, + context->data); + if (net_dev) + break; + } + } + + up_read(&lists_rwsem); + + return net_dev; +} +EXPORT_SYMBOL(ib_get_net_dev_by_params); + static int __init ib_core_init(void) { int ret; @@ -741,7 +954,7 @@ static int __init ib_core_init(void) if (!ib_wq) return -ENOMEM; - ret = ib_sysfs_setup(); + ret = class_register(&ib_class); if (ret) { printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); goto err; @@ -753,19 +966,12 @@ static int __init ib_core_init(void) goto err_sysfs; } - ret = ib_cache_setup(); - if (ret) { - printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); - goto err_nl; - } + ib_cache_setup(); return 0; -err_nl: - ibnl_cleanup(); - err_sysfs: - ib_sysfs_cleanup(); + class_unregister(&ib_class); err: destroy_workqueue(ib_wq); @@ -776,7 +982,7 @@ static void __exit ib_core_cleanup(void) { ib_cache_cleanup(); ibnl_cleanup(); - ib_sysfs_cleanup(); + class_unregister(&ib_class); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); } diff --git a/kernel/drivers/infiniband/core/iwpm_msg.c b/kernel/drivers/infiniband/core/iwpm_msg.c index e6ffa2e66..22a3abee2 100644 --- a/kernel/drivers/infiniband/core/iwpm_msg.c +++ b/kernel/drivers/infiniband/core/iwpm_msg.c @@ -67,7 +67,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto pid_query_error; } - if (iwpm_registered_client(nl_client)) + if (iwpm_check_registration(nl_client, IWPM_REG_VALID) || + iwpm_user_pid == IWPM_PID_UNAVAILABLE) return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client); if (!skb) { @@ -106,7 +107,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ - iwpm_set_registered(nl_client, 1); iwpm_user_pid = IWPM_PID_UNAVAILABLE; err_str = "Unable to send a nlmsg"; goto pid_query_error; @@ -144,12 +144,12 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto add_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) { err_str = "Unregistered port mapper client"; goto add_mapping_error; } - if (!iwpm_valid_pid()) - return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client); if (!skb) { err_str = "Unable to create a nlmsg"; @@ -214,12 +214,12 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto query_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) { err_str = "Unregistered port mapper client"; goto query_mapping_error; } - if (!iwpm_valid_pid()) - return 0; ret = -ENOMEM; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client); if (!skb) { @@ -288,12 +288,12 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) err_str = "Invalid port mapper client"; goto remove_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) { err_str = "Unregistered port mapper client"; goto remove_mapping_error; } - if (!iwpm_valid_pid()) - return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client); if (!skb) { ret = -ENOMEM; @@ -388,7 +388,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", __func__, iwpm_user_pid); if (iwpm_valid_client(nl_client)) - iwpm_set_registered(nl_client, 1); + iwpm_set_registration(nl_client, IWPM_REG_VALID); register_pid_response_exit: nlmsg_request->request_done = 1; /* always for found nlmsg_request */ @@ -644,7 +644,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX]; const char *msg_type = "Mapping Info response"; - int iwpm_pid; u8 nl_client; char *iwpm_name; u16 iwpm_version; @@ -669,14 +668,14 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) __func__, nl_client); return ret; } - iwpm_set_registered(nl_client, 0); + iwpm_set_registration(nl_client, IWPM_REG_INCOMPL); atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + iwpm_user_pid = cb->nlh->nlmsg_pid; if (!iwpm_mapinfo_available()) return 0; - iwpm_pid = cb->nlh->nlmsg_pid; pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", - __func__, iwpm_pid); - ret = iwpm_send_mapinfo(nl_client, iwpm_pid); + __func__, iwpm_user_pid); + ret = iwpm_send_mapinfo(nl_client, iwpm_user_pid); return ret; } EXPORT_SYMBOL(iwpm_mapping_info_cb); diff --git a/kernel/drivers/infiniband/core/iwpm_util.c b/kernel/drivers/infiniband/core/iwpm_util.c index a626795bf..5fb089e91 100644 --- a/kernel/drivers/infiniband/core/iwpm_util.c +++ b/kernel/drivers/infiniband/core/iwpm_util.c @@ -78,6 +78,7 @@ init_exit: mutex_unlock(&iwpm_admin_lock); if (!ret) { iwpm_set_valid(nl_client, 1); + iwpm_set_registration(nl_client, IWPM_REG_UNDEF); pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__); } @@ -106,6 +107,7 @@ int iwpm_exit(u8 nl_client) } mutex_unlock(&iwpm_admin_lock); iwpm_set_valid(nl_client, 0); + iwpm_set_registration(nl_client, IWPM_REG_UNDEF); return 0; } EXPORT_SYMBOL(iwpm_exit); @@ -397,17 +399,23 @@ void iwpm_set_valid(u8 nl_client, int valid) } /* valid client */ -int iwpm_registered_client(u8 nl_client) +u32 iwpm_get_registration(u8 nl_client) { return iwpm_admin.reg_list[nl_client]; } /* valid client */ -void iwpm_set_registered(u8 nl_client, int reg) +void iwpm_set_registration(u8 nl_client, u32 reg) { iwpm_admin.reg_list[nl_client] = reg; } +/* valid client */ +u32 iwpm_check_registration(u8 nl_client, u32 reg) +{ + return (iwpm_get_registration(nl_client) & reg); +} + int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, struct sockaddr_storage *b_sockaddr) { diff --git a/kernel/drivers/infiniband/core/iwpm_util.h b/kernel/drivers/infiniband/core/iwpm_util.h index ee2d9ff09..b7b9e194c 100644 --- a/kernel/drivers/infiniband/core/iwpm_util.h +++ b/kernel/drivers/infiniband/core/iwpm_util.h @@ -58,6 +58,10 @@ #define IWPM_PID_UNDEFINED -1 #define IWPM_PID_UNAVAILABLE -2 +#define IWPM_REG_UNDEF 0x01 +#define IWPM_REG_VALID 0x02 +#define IWPM_REG_INCOMPL 0x04 + struct iwpm_nlmsg_request { struct list_head inprocess_list; __u32 nlmsg_seq; @@ -88,7 +92,7 @@ struct iwpm_admin_data { atomic_t refcount; atomic_t nlmsg_seq; int client_list[RDMA_NL_NUM_CLIENTS]; - int reg_list[RDMA_NL_NUM_CLIENTS]; + u32 reg_list[RDMA_NL_NUM_CLIENTS]; }; /** @@ -159,19 +163,31 @@ int iwpm_valid_client(u8 nl_client); void iwpm_set_valid(u8 nl_client, int valid); /** - * iwpm_registered_client - Check if the port mapper client is registered + * iwpm_check_registration - Check if the client registration + * matches the given one * @nl_client: The index of the netlink client + * @reg: The given registration type to compare with * * Call iwpm_register_pid() to register a client + * Returns true if the client registration matches reg, + * otherwise returns false + */ +u32 iwpm_check_registration(u8 nl_client, u32 reg); + +/** + * iwpm_set_registration - Set the client registration + * @nl_client: The index of the netlink client + * @reg: Registration type to set */ -int iwpm_registered_client(u8 nl_client); +void iwpm_set_registration(u8 nl_client, u32 reg); /** - * iwpm_set_registered - Set the port mapper client to registered or not + * iwpm_get_registration * @nl_client: The index of the netlink client - * @reg: 1 if registered or 0 if not + * + * Returns the client registration type */ -void iwpm_set_registered(u8 nl_client, int reg); +u32 iwpm_get_registration(u8 nl_client); /** * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of diff --git a/kernel/drivers/infiniband/core/mad.c b/kernel/drivers/infiniband/core/mad.c index 74c30f4c5..2281de122 100644 --- a/kernel/drivers/infiniband/core/mad.c +++ b/kernel/drivers/infiniband/core/mad.c @@ -3,6 +3,7 @@ * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -44,6 +45,7 @@ #include "mad_priv.h" #include "mad_rmpp.h" #include "smi.h" +#include "opa_smi.h" #include "agent.h" MODULE_LICENSE("Dual BSD/GPL"); @@ -59,8 +61,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -static struct kmem_cache *ib_mad_cache; - static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; @@ -73,7 +73,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, - struct ib_mad *mad); + const struct ib_mad_hdr *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); @@ -179,12 +179,12 @@ static int is_vendor_method_in_use( return 0; } -int ib_response_mad(struct ib_mad *mad) +int ib_response_mad(const struct ib_mad_hdr *hdr) { - return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) || - (mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || - ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) && - (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP))); + return ((hdr->method & IB_MGMT_METHOD_RESP) || + (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || + ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && + (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); } EXPORT_SYMBOL(ib_response_mad); @@ -338,13 +338,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error1; } - mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(mad_agent_priv->agent.mr)) { - ret = ERR_PTR(-ENOMEM); - goto error2; - } - if (mad_reg_req) { reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); if (!reg_req) { @@ -429,8 +422,6 @@ error4: spin_unlock_irqrestore(&port_priv->reg_lock, flags); kfree(reg_req); error3: - ib_dereg_mr(mad_agent_priv->agent.mr); -error2: kfree(mad_agent_priv); error1: return ret; @@ -590,7 +581,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) wait_for_completion(&mad_agent_priv->comp); kfree(mad_agent_priv->reg_req); - ib_dereg_mr(mad_agent_priv->agent.mr); kfree(mad_agent_priv); } @@ -717,6 +707,32 @@ static void build_smp_wc(struct ib_qp *qp, wc->port_num = port_num; } +static size_t mad_priv_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_mad_private) + mp->mad_size; +} + +static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) +{ + size_t size = sizeof(struct ib_mad_private) + mad_size; + struct ib_mad_private *ret = kzalloc(size, flags); + + if (ret) + ret->mad_size = mad_size; + + return ret; +} + +static size_t port_mad_size(const struct ib_mad_port_private *port_priv) +{ + return rdma_max_mad_size(port_priv->device, port_priv->port_num); +} + +static size_t mad_priv_dma_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_grh) + mp->mad_size; +} + /* * Return 0 if SMP is to be sent * Return 1 if SMP was consumed locally (whether or not solicited) @@ -727,6 +743,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, { int ret = 0; struct ib_smp *smp = mad_send_wr->send_buf.mad; + struct opa_smp *opa_smp = (struct opa_smp *)smp; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -735,11 +752,16 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_device *device = mad_agent_priv->agent.device; u8 port_num; struct ib_wc mad_wc; - struct ib_send_wr *send_wr = &mad_send_wr->send_wr; - - if (device->node_type == RDMA_NODE_IB_SWITCH && + struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; + size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); + u16 out_mad_pkey_index = 0; + u16 drslid; + bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + + if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - port_num = send_wr->wr.ud.port_num; + port_num = send_wr->port_num; else port_num = mad_agent_priv->agent.port_num; @@ -749,19 +771,49 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ - if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == - IB_LID_PERMISSIVE && - smi_handle_dr_smp_send(smp, device->node_type, port_num) == - IB_SMI_DISCARD) { - ret = -EINVAL; - dev_err(&device->dev, "Invalid directed route\n"); - goto out; - } + if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) { + u32 opa_drslid; + + if ((opa_get_smp_direction(opa_smp) + ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == + OPA_LID_PERMISSIVE && + opa_smi_handle_dr_smp_send(opa_smp, + rdma_cap_ib_switch(device), + port_num) == IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid directed route\n"); + goto out; + } + opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); + if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && + opa_drslid & 0xffff0000) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", + opa_drslid); + goto out; + } + drslid = (u16)(opa_drslid & 0x0000ffff); - /* Check to post send on QP or process locally */ - if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && - smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) - goto out; + /* Check to post send on QP or process locally */ + if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && + opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) + goto out; + } else { + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == + IB_LID_PERMISSIVE && + smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == + IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "Invalid directed route\n"); + goto out; + } + drslid = be16_to_cpu(smp->dr_slid); + + /* Check to post send on QP or process locally */ + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) + goto out; + } local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { @@ -771,7 +823,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } local->mad_priv = NULL; local->recv_mad_agent = NULL; - mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC); + mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; dev_err(&device->dev, "No memory for local response MAD\n"); @@ -780,18 +832,25 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } build_smp_wc(mad_agent_priv->agent.qp, - send_wr->wr_id, be16_to_cpu(smp->dr_slid), - send_wr->wr.ud.pkey_index, - send_wr->wr.ud.port_num, &mad_wc); + send_wr->wr.wr_id, drslid, + send_wr->pkey_index, + send_wr->port_num, &mad_wc); + + if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { + mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + + mad_send_wr->send_buf.data_len + + sizeof(struct ib_grh); + } /* No GRH for DR SMP */ ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, - (struct ib_mad *)smp, - (struct ib_mad *)&mad_priv->mad); + (const struct ib_mad_hdr *)smp, mad_size, + (struct ib_mad_hdr *)mad_priv->mad, + &mad_size, &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - if (ib_response_mad(&mad_priv->mad.mad) && + if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; @@ -801,39 +860,43 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, */ atomic_inc(&mad_agent_priv->refcount); } else - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { - memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad)); + memcpy(mad_priv->mad, smp, mad_priv->mad_size); recv_mad_agent = find_mad_agent(port_priv, - &mad_priv->mad.mad); + (const struct ib_mad_hdr *)mad_priv->mad); } if (!port_priv || !recv_mad_agent) { /* * No receiving agent so drop packet and * generate send completion. */ - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; break; default: - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); kfree(local); ret = -EINVAL; goto out; } local->mad_send_wr = mad_send_wr; + if (opa) { + local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; + local->return_wc_byte_len = mad_size; + } /* Reference MAD agent until send side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ @@ -847,11 +910,11 @@ out: return ret; } -static int get_pad_size(int hdr_len, int data_len) +static int get_pad_size(int hdr_len, int data_len, size_t mad_size) { int seg_size, pad; - seg_size = sizeof(struct ib_mad) - hdr_len; + seg_size = mad_size - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; return pad == seg_size ? 0 : pad; @@ -870,14 +933,15 @@ static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) } static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, - gfp_t gfp_mask) + size_t mad_size, gfp_t gfp_mask) { struct ib_mad_send_buf *send_buf = &send_wr->send_buf; struct ib_rmpp_mad *rmpp_mad = send_buf->mad; struct ib_rmpp_segment *seg = NULL; int left, seg_size, pad; - send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len; + send_buf->seg_size = mad_size - send_buf->hdr_len; + send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; seg_size = send_buf->seg_size; pad = send_wr->pad; @@ -910,7 +974,7 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, return 0; } -int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent) +int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) { return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); } @@ -920,26 +984,37 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, - gfp_t gfp_mask) + gfp_t gfp_mask, + u8 base_version) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; int pad, message_size, ret, size; void *buf; + size_t mad_size; + bool opa; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); - pad = get_pad_size(hdr_len, data_len); + + opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); + + if (opa && base_version == OPA_MGMT_BASE_VERSION) + mad_size = sizeof(struct opa_mad); + else + mad_size = sizeof(struct ib_mad); + + pad = get_pad_size(hdr_len, data_len, mad_size); message_size = hdr_len + data_len + pad; if (ib_mad_kernel_rmpp_agent(mad_agent)) { - if (!rmpp_active && message_size > sizeof(struct ib_mad)) + if (!rmpp_active && message_size > mad_size) return ERR_PTR(-EINVAL); } else - if (rmpp_active || message_size > sizeof(struct ib_mad)) + if (rmpp_active || message_size > mad_size) return ERR_PTR(-EINVAL); - size = rmpp_active ? hdr_len : sizeof(struct ib_mad); + size = rmpp_active ? hdr_len : mad_size; buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); @@ -953,21 +1028,28 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->mad_agent_priv = mad_agent_priv; mad_send_wr->sg_list[0].length = hdr_len; - mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; - mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len; - mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey; - - mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 2; - mad_send_wr->send_wr.opcode = IB_WR_SEND; - mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; - mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; - mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; + mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey; + + /* OPA MADs don't have to be the full 2048 bytes */ + if (opa && base_version == OPA_MGMT_BASE_VERSION && + data_len < mad_size - hdr_len) + mad_send_wr->sg_list[1].length = data_len; + else + mad_send_wr->sg_list[1].length = mad_size - hdr_len; + + mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; + + mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr; + mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.wr.num_sge = 2; + mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.remote_qpn = remote_qpn; + mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.pkey_index = pkey_index; if (rmpp_active) { - ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask); + ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); if (ret) { kfree(buf); return ERR_PTR(ret); @@ -1069,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_send_wr->mad_agent_priv->qp_info; - mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; + mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; mad_agent = mad_send_wr->send_buf.mad_agent; @@ -1097,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); list = &qp_info->send_queue.list; } else { @@ -1162,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, * request associated with the completion */ next_send_buf = send_buf->next; - mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; + mad_send_wr->send_wr.ah = send_buf->ah; if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { @@ -1237,7 +1319,7 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); - kmem_cache_free(ib_mad_cache, priv); + kfree(priv); } } EXPORT_SYMBOL(ib_free_recv_mad); @@ -1324,7 +1406,7 @@ static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) } static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, - char *oui) + const char *oui) { int i; @@ -1622,13 +1704,13 @@ out: static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, - struct ib_mad *mad) + const struct ib_mad_hdr *mad_hdr) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; spin_lock_irqsave(&port_priv->reg_lock, flags); - if (ib_response_mad(mad)) { + if (ib_response_mad(mad_hdr)) { u32 hi_tid; struct ib_mad_agent_private *entry; @@ -1636,7 +1718,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, * Routing is based on high 32 bits of transaction ID * of MAD. */ - hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; + hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; list_for_each_entry(entry, &port_priv->agent_list, agent_list) { if (entry->agent.hi_tid == hi_tid) { mad_agent = entry; @@ -1648,45 +1730,45 @@ find_mad_agent(struct ib_mad_port_private *port_priv, struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; - struct ib_vendor_mad *vendor_mad; + const struct ib_vendor_mad *vendor_mad; int index; /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI */ - if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION) + if (mad_hdr->class_version >= MAX_MGMT_VERSION) goto out; - if (!is_vendor_class(mad->mad_hdr.mgmt_class)) { + if (!is_vendor_class(mad_hdr->mgmt_class)) { class = port_priv->version[ - mad->mad_hdr.class_version].class; + mad_hdr->class_version].class; if (!class) goto out; - if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + if (convert_mgmt_class(mad_hdr->mgmt_class) >= IB_MGMT_MAX_METHODS) goto out; method = class->method_table[convert_mgmt_class( - mad->mad_hdr.mgmt_class)]; + mad_hdr->mgmt_class)]; if (method) - mad_agent = method->agent[mad->mad_hdr.method & + mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } else { vendor = port_priv->version[ - mad->mad_hdr.class_version].vendor; + mad_hdr->class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[vendor_class_index( - mad->mad_hdr.mgmt_class)]; + mad_hdr->mgmt_class)]; if (!vendor_class) goto out; /* Find matching OUI */ - vendor_mad = (struct ib_vendor_mad *)mad; + vendor_mad = (const struct ib_vendor_mad *)mad_hdr; index = find_vendor_oui(vendor_class, vendor_mad->oui); if (index == -1) goto out; method = vendor_class->method_table[index]; if (method) { - mad_agent = method->agent[mad->mad_hdr.method & + mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } } @@ -1708,23 +1790,32 @@ out: return mad_agent; } -static int validate_mad(struct ib_mad *mad, u32 qp_num) +static int validate_mad(const struct ib_mad_hdr *mad_hdr, + const struct ib_mad_qp_info *qp_info, + bool opa) { int valid = 0; + u32 qp_num = qp_info->qp->qp_num; /* Make sure MAD base version is understood */ - if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) { - pr_err("MAD received with unsupported base version %d\n", - mad->mad_hdr.base_version); + if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && + (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { + pr_err("MAD received with unsupported base version %d %s\n", + mad_hdr->base_version, opa ? "(opa)" : ""); goto out; } /* Filter SMI packets sent to other than QP0 */ - if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || - (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { if (qp_num == 0) valid = 1; } else { + /* CM attributes other than ClassPortInfo only use Send method */ + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && + (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && + (mad_hdr->method != IB_MGMT_METHOD_SEND)) + goto out; /* Filter GSI packets sent to QP0 */ if (qp_num != 0) valid = 1; @@ -1734,8 +1825,8 @@ out: return valid; } -static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_hdr *mad_hdr) +static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_hdr *mad_hdr) { struct ib_rmpp_mad *rmpp_mad; @@ -1747,16 +1838,16 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } -static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr, - struct ib_mad_recv_wc *rwc) +static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) { - return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class == + return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == rwc->recv_buf.mad->mad_hdr.mgmt_class; } -static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *wr, - struct ib_mad_recv_wc *rwc ) +static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc ) { struct ib_ah_attr attr; u8 send_resp, rcv_resp; @@ -1765,8 +1856,8 @@ static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, u8 port_num = mad_agent_priv->agent.port_num; u8 lmc; - send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad); - rcv_resp = ib_response_mad(rwc->recv_buf.mad); + send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); + rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ @@ -1791,7 +1882,7 @@ static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, ((1 << lmc) - 1))); } else { if (ib_get_cached_gid(device, port_num, - attr.grh.sgid_index, &sgid)) + attr.grh.sgid_index, &sgid, NULL)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); @@ -1811,22 +1902,22 @@ static inline int is_direct(u8 class) } struct ib_mad_send_wr_private* -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *wc) +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *wc) { struct ib_mad_send_wr_private *wr; - struct ib_mad *mad; + const struct ib_mad_hdr *mad_hdr; - mad = (struct ib_mad *)wc->recv_buf.mad; + mad_hdr = &wc->recv_buf.mad->mad_hdr; list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { - if ((wr->tid == mad->mad_hdr.tid) && + if ((wr->tid == mad_hdr->tid) && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ - (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } @@ -1836,15 +1927,15 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, * been notified that the send has completed */ list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, wr->send_buf.mad) && - wr->tid == mad->mad_hdr.tid && + if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && + wr->tid == mad_hdr->tid && wr->timeout && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ - (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ return (wr->status == IB_WC_SUCCESS) ? wr : NULL; @@ -1879,7 +1970,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } /* Complete corresponding request */ - if (ib_response_mad(mad_recv_wc->recv_buf.mad)) { + if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { @@ -1924,26 +2015,163 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } } -static bool generate_unmatched_resp(struct ib_mad_private *recv, - struct ib_mad_private *response) +static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, + const struct ib_mad_qp_info *qp_info, + const struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) { - if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || - recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { - memcpy(response, recv, sizeof *response); + enum smi_forward_action retsmi; + struct ib_smp *smp = (struct ib_smp *)recv->mad; + + if (smi_handle_dr_smp_recv(smp, + rdma_cap_ib_switch(port_priv->device), + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(smp, + rdma_cap_ib_switch(port_priv->device), + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + } else if (rdma_cap_ib_switch(port_priv->device)) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; - response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - response->mad.mad.mad_hdr.status = - cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); - if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + smi_get_fwd_port(smp), + qp_info->qp->qp_num, + response->mad_size, + false); + + return IB_SMI_DISCARD; + } + return IB_SMI_HANDLE; +} + +static bool generate_unmatched_resp(const struct ib_mad_private *recv, + struct ib_mad_private *response, + size_t *resp_len, bool opa) +{ + const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; + struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; + + if (recv_hdr->method == IB_MGMT_METHOD_GET || + recv_hdr->method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + resp_hdr->method = IB_MGMT_METHOD_GET_RESP; + resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + resp_hdr->status |= IB_SMP_DIRECTION; + + if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { + if (recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED || + recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + *resp_len = opa_get_smp_header_size( + (struct opa_smp *)recv->mad); + else + *resp_len = sizeof(struct ib_mad_hdr); + } return true; } else { return false; } } + +static enum smi_action +handle_opa_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct opa_smp *smp = (struct opa_smp *)recv->mad; + + if (opa_smi_handle_dr_smp_recv(smp, + rdma_cap_ib_switch(port_priv->device), + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = opa_smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (opa_smi_handle_dr_smp_send(smp, + rdma_cap_ib_switch(port_priv->device), + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (opa_smi_check_local_smp(smp, port_priv->device) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + } else if (rdma_cap_ib_switch(port_priv->device)) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.opa_mad = + (struct opa_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + opa_smi_get_fwd_port(smp), + qp_info->qp->qp_num, + recv->header.wc.byte_len, + true); + + return IB_SMI_DISCARD; + } + + return IB_SMI_HANDLE; +} + +static enum smi_action +handle_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response, + bool opa) +{ + struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; + + if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && + mad_hdr->class_version == OPA_SMI_CLASS_VERSION) + return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, + response); + + return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); +} + static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { @@ -1954,109 +2182,97 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_agent_private *mad_agent; int port_num; int ret = IB_MAD_RESULT_SUCCESS; + size_t mad_size; + u16 resp_mad_pkey_index = 0; + bool opa; mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; qp_info = mad_list->mad_queue->qp_info; dequeue_mad(mad_list); + opa = rdma_cap_opa_mad(qp_info->port_priv->device, + qp_info->port_priv->port_num); + mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); ib_dma_unmap_single(port_priv->device, recv->header.mapping, - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), + mad_priv_dma_size(recv), DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; recv->header.recv_wc.wc = &recv->header.wc; - recv->header.recv_wc.mad_len = sizeof(struct ib_mad); - recv->header.recv_wc.recv_buf.mad = &recv->mad.mad; + + if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { + recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); + recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + recv->header.recv_wc.mad_len = sizeof(struct ib_mad); + recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + + recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; if (atomic_read(&qp_info->snoop_count)) snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); /* Validate MAD */ - if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) + if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; - response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + mad_size = recv->mad_size; + response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) { dev_err(&port_priv->device->dev, "ib_mad_recv_done_handler no memory for response buffer\n"); goto out; } - if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; else port_num = port_priv->port_num; - if (recv->mad.mad.mad_hdr.mgmt_class == + if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - enum smi_forward_action retsmi; - - if (smi_handle_dr_smp_recv(&recv->mad.smp, - port_priv->device->node_type, - port_num, - port_priv->device->phys_port_cnt) == - IB_SMI_DISCARD) + if (handle_smi(port_priv, qp_info, wc, port_num, recv, + response, opa) + == IB_SMI_DISCARD) goto out; - - retsmi = smi_check_forward_dr_smp(&recv->mad.smp); - if (retsmi == IB_SMI_LOCAL) - goto local; - - if (retsmi == IB_SMI_SEND) { /* don't forward */ - if (smi_handle_dr_smp_send(&recv->mad.smp, - port_priv->device->node_type, - port_num) == IB_SMI_DISCARD) - goto out; - - if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) - goto out; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { - /* forward case for switches */ - memcpy(response, recv, sizeof(*response)); - response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = &response->mad.mad; - response->header.recv_wc.recv_buf.grh = &response->grh; - - agent_send_response(&response->mad.mad, - &response->grh, wc, - port_priv->device, - smi_get_fwd_port(&recv->mad.smp), - qp_info->qp->qp_num); - - goto out; - } } -local: /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->process_mad) { ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, - &recv->mad.mad, - &response->mad.mad); + (const struct ib_mad_hdr *)recv->mad, + recv->mad_size, + (struct ib_mad_hdr *)response->mad, + &mad_size, &resp_mad_pkey_index); + + if (opa) + wc->pkey_index = resp_mad_pkey_index; + if (ret & IB_MAD_RESULT_SUCCESS) { if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { - agent_send_response(&response->mad.mad, + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, - qp_info->qp->qp_num); + qp_info->qp->qp_num, + mad_size, opa); goto out; } } } - mad_agent = find_mad_agent(port_priv, &recv->mad.mad); + mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* @@ -2065,17 +2281,17 @@ local: */ recv = NULL; } else if ((ret & IB_MAD_RESULT_SUCCESS) && - generate_unmatched_resp(recv, response)) { - agent_send_response(&response->mad.mad, &recv->grh, wc, - port_priv->device, port_num, qp_info->qp->qp_num); + generate_unmatched_resp(recv, response, &mad_size, opa)) { + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, + port_priv->device, port_num, + qp_info->qp->qp_num, mad_size, opa); } out: /* Post another receive request for this QP */ if (response) { ib_mad_post_receive_mads(qp_info, response); - if (recv) - kmem_cache_free(ib_mad_cache, recv); + kfree(recv); } else ib_mad_post_receive_mads(qp_info, recv); } @@ -2246,7 +2462,7 @@ retry: ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { - ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, + ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, &bad_send_wr); if (ret) { dev_err(&port_priv->device->dev, @@ -2304,7 +2520,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, struct ib_send_wr *bad_send_wr; mad_send_wr->retry = 0; - ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr, + ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); if (ret) ib_mad_send_done_handler(port_priv, wc); @@ -2411,7 +2627,8 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv, list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && + if (is_rmpp_data_mad(mad_agent_priv, + mad_send_wr->send_buf.mad) && &mad_send_wr->send_buf == send_buf) return mad_send_wr; } @@ -2468,10 +2685,14 @@ static void local_completions(struct work_struct *work) int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; + bool opa; mad_agent_priv = container_of(work, struct ib_mad_agent_private, local_work); + opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { local = list_entry(mad_agent_priv->local_list.next, @@ -2481,6 +2702,7 @@ static void local_completions(struct work_struct *work) spin_unlock_irqrestore(&mad_agent_priv->lock, flags); free_mad = 0; if (local->mad_priv) { + u8 base_version; recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { dev_err(&mad_agent_priv->agent.device->dev, @@ -2496,17 +2718,26 @@ static void local_completions(struct work_struct *work) build_smp_wc(recv_mad_agent->agent.qp, (unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - 0, recv_mad_agent->agent.port_num, &wc); + local->mad_send_wr->send_wr.pkey_index, + recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; - local->mad_priv->header.recv_wc.mad_len = - sizeof(struct ib_mad); + + base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; + if (opa && base_version == OPA_MGMT_BASE_VERSION) { + local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); list_add(&local->mad_priv->header.recv_wc.recv_buf.list, &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = - &local->mad_priv->mad.mad; + (struct ib_mad *)local->mad_priv->mad; if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) snoop_recv(recv_mad_agent->qp_info, &local->mad_priv->header.recv_wc, @@ -2534,7 +2765,7 @@ local_send_completion: spin_lock_irqsave(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); if (free_mad) - kmem_cache_free(ib_mad_cache, local->mad_priv); + kfree(local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -2649,8 +2880,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_queue *recv_queue = &qp_info->recv_queue; /* Initialize common scatter list fields */ - sg_list.length = sizeof *mad_priv - sizeof mad_priv->header; - sg_list.lkey = (*qp_info->port_priv->mr).lkey; + sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; /* Initialize common receive WR fields */ recv_wr.next = NULL; @@ -2663,7 +2893,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, mad_priv = mad; mad = NULL; } else { - mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), + GFP_ATOMIC); if (!mad_priv) { dev_err(&qp_info->port_priv->device->dev, "No memory for receive buffer\n"); @@ -2671,10 +2902,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, break; } } + sg_list.length = mad_priv_dma_size(mad_priv); sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, &mad_priv->grh, - sizeof *mad_priv - - sizeof mad_priv->header, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { @@ -2698,10 +2929,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, spin_unlock_irqrestore(&recv_queue->lock, flags); ib_dma_unmap_single(qp_info->port_priv->device, mad_priv->header.mapping, - sizeof *mad_priv - - sizeof mad_priv->header, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); dev_err(&qp_info->port_priv->device->dev, "ib_post_recv failed: %d\n", ret); break; @@ -2738,10 +2968,9 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) ib_dma_unmap_single(qp_info->port_priv->device, recv->header.mapping, - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), + mad_priv_dma_size(recv), DMA_FROM_DEVICE); - kmem_cache_free(ib_mad_cache, recv); + kfree(recv); } qp_info->recv_queue.count = 0; @@ -2922,6 +3151,14 @@ static int ib_mad_port_open(struct ib_device *device, unsigned long flags; char name[sizeof "ib_mad123"]; int has_smi; + struct ib_cq_init_attr cq_attr = {}; + + if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) + return -EFAULT; + + if (WARN_ON(rdma_cap_opa_mad(device, port_num) && + rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) + return -EFAULT; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); @@ -2938,13 +3175,14 @@ static int ib_mad_port_open(struct ib_device *device, init_mad_qp(port_priv, &port_priv->qp_info[1]); cq_size = mad_sendq_size + mad_recvq_size; - has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND; + has_smi = rdma_cap_ib_smi(device, port_num); if (has_smi) cq_size *= 2; + cq_attr.cqe = cq_size; port_priv->cq = ib_create_cq(port_priv->device, ib_mad_thread_completion_handler, - NULL, port_priv, cq_size, 0); + NULL, port_priv, &cq_attr); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); @@ -2958,13 +3196,6 @@ static int ib_mad_port_open(struct ib_device *device, goto error4; } - port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(port_priv->mr)) { - dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n"); - ret = PTR_ERR(port_priv->mr); - goto error5; - } - if (has_smi) { ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); if (ret) @@ -3005,8 +3236,6 @@ error8: error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: - ib_dereg_mr(port_priv->mr); -error5: ib_dealloc_pd(port_priv->pd); error4: ib_destroy_cq(port_priv->cq); @@ -3041,7 +3270,6 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); - ib_dereg_mr(port_priv->mr); ib_dealloc_pd(port_priv->pd); ib_destroy_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); @@ -3055,20 +3283,14 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int start, end, i; + int start, i; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + start = rdma_start_port(device); - if (device->node_type == RDMA_NODE_IB_SWITCH) { - start = 0; - end = 0; - } else { - start = 1; - end = device->phys_port_cnt; - } + for (i = start; i <= rdma_end_port(device); i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; - for (i = start; i <= end; i++) { if (ib_mad_port_open(device, i)) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; @@ -3086,40 +3308,31 @@ error_agent: dev_err(&device->dev, "Couldn't close port %d\n", i); error: - i--; + while (--i >= start) { + if (!rdma_cap_ib_mad(device, i)) + continue; - while (i >= start) { if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); - i--; } } -static void ib_mad_remove_device(struct ib_device *device) +static void ib_mad_remove_device(struct ib_device *device, void *client_data) { - int i, num_ports, cur_port; + int i; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - num_ports = 1; - cur_port = 0; - } else { - num_ports = device->phys_port_cnt; - cur_port = 1; - } - for (i = 0; i < num_ports; i++, cur_port++) { - if (ib_agent_port_close(device, cur_port)) + if (ib_agent_port_close(device, i)) dev_err(&device->dev, - "Couldn't close port %d for agents\n", - cur_port); - if (ib_mad_port_close(device, cur_port)) - dev_err(&device->dev, "Couldn't close port %d\n", - cur_port); + "Couldn't close port %d for agents\n", i); + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %d\n", i); } } @@ -3131,45 +3344,25 @@ static struct ib_client mad_client = { static int __init ib_mad_init_module(void) { - int ret; - mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); - ib_mad_cache = kmem_cache_create("ib_mad", - sizeof(struct ib_mad_private), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!ib_mad_cache) { - pr_err("Couldn't create ib_mad cache\n"); - ret = -ENOMEM; - goto error1; - } - INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); - ret = -EINVAL; - goto error2; + return -EINVAL; } return 0; - -error2: - kmem_cache_destroy(ib_mad_cache); -error1: - return ret; } static void __exit ib_mad_cleanup_module(void) { ib_unregister_client(&mad_client); - kmem_cache_destroy(ib_mad_cache); } module_init(ib_mad_init_module); diff --git a/kernel/drivers/infiniband/core/mad_priv.h b/kernel/drivers/infiniband/core/mad_priv.h index d1a0b0ee9..990698a6a 100644 --- a/kernel/drivers/infiniband/core/mad_priv.h +++ b/kernel/drivers/infiniband/core/mad_priv.h @@ -41,6 +41,7 @@ #include <linux/workqueue.h> #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> +#include <rdma/opa_smi.h> #define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */ @@ -56,7 +57,7 @@ /* Registration table sizes */ #define MAX_MGMT_CLASS 80 -#define MAX_MGMT_VERSION 8 +#define MAX_MGMT_VERSION 0x83 #define MAX_MGMT_OUI 8 #define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \ IB_MGMT_CLASS_VENDOR_RANGE2_START + 1) @@ -75,12 +76,9 @@ struct ib_mad_private_header { struct ib_mad_private { struct ib_mad_private_header header; + size_t mad_size; struct ib_grh grh; - union { - struct ib_mad mad; - struct ib_rmpp_mad rmpp_mad; - struct ib_smp smp; - } mad; + u8 mad[0]; } __attribute__ ((packed)); struct ib_rmpp_segment { @@ -125,7 +123,7 @@ struct ib_mad_send_wr_private { struct ib_mad_send_buf send_buf; u64 header_mapping; u64 payload_mapping; - struct ib_send_wr send_wr; + struct ib_ud_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; @@ -150,6 +148,7 @@ struct ib_mad_local_private { struct ib_mad_private *mad_priv; struct ib_mad_agent_private *recv_mad_agent; struct ib_mad_send_wr_private *mad_send_wr; + size_t return_wc_byte_len; }; struct ib_mad_mgmt_method_table { @@ -200,7 +199,6 @@ struct ib_mad_port_private { int port_num; struct ib_cq *cq; struct ib_pd *pd; - struct ib_mr *mr; spinlock_t reg_lock; struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; @@ -213,8 +211,8 @@ struct ib_mad_port_private { int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); struct ib_mad_send_wr_private * -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *mad_recv_wc); +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *mad_recv_wc); void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); diff --git a/kernel/drivers/infiniband/core/mad_rmpp.c b/kernel/drivers/infiniband/core/mad_rmpp.c index f37878c9c..382941b46 100644 --- a/kernel/drivers/infiniband/core/mad_rmpp.c +++ b/kernel/drivers/infiniband/core/mad_rmpp.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Intel Inc. All rights reserved. * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -67,6 +68,7 @@ struct mad_rmpp_recv { u8 mgmt_class; u8 class_version; u8 method; + u8 base_version; }; static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) @@ -139,7 +141,8 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, hdr_len, - 0, GFP_KERNEL); + 0, GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) return; @@ -165,7 +168,8 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, - hdr_len, 0, GFP_KERNEL); + hdr_len, 0, GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) ib_destroy_ah(ah); else { @@ -316,6 +320,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, rmpp_recv->mgmt_class = mad_hdr->mgmt_class; rmpp_recv->class_version = mad_hdr->class_version; rmpp_recv->method = mad_hdr->method; + rmpp_recv->base_version = mad_hdr->base_version; return rmpp_recv; error: kfree(rmpp_recv); @@ -431,14 +436,23 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) { struct ib_rmpp_mad *rmpp_mad; int hdr_size, data_size, pad; + bool opa = rdma_cap_opa_mad(rmpp_recv->agent->qp_info->port_priv->device, + rmpp_recv->agent->qp_info->port_priv->port_num); rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); - data_size = sizeof(struct ib_rmpp_mad) - hdr_size; - pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); - if (pad > IB_MGMT_RMPP_DATA || pad < 0) - pad = 0; + if (opa && rmpp_recv->base_version == OPA_MGMT_BASE_VERSION) { + data_size = sizeof(struct opa_rmpp_mad) - hdr_size; + pad = OPA_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > OPA_MGMT_RMPP_DATA || pad < 0) + pad = 0; + } else { + data_size = sizeof(struct ib_rmpp_mad) - hdr_size; + pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > IB_MGMT_RMPP_DATA || pad < 0) + pad = 0; + } return hdr_size + rmpp_recv->seg_num * data_size - pad; } @@ -570,13 +584,14 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) if (mad_send_wr->seg_num == 1) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; - paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA - - mad_send_wr->pad; + paylen = (mad_send_wr->send_buf.seg_count * + mad_send_wr->send_buf.seg_rmpp_size) - + mad_send_wr->pad; } if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; - paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad; + paylen = mad_send_wr->send_buf.seg_rmpp_size - mad_send_wr->pad; } rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); diff --git a/kernel/drivers/infiniband/core/multicast.c b/kernel/drivers/infiniband/core/multicast.c index fa17b552f..bb6685fb0 100644 --- a/kernel/drivers/infiniband/core/multicast.c +++ b/kernel/drivers/infiniband/core/multicast.c @@ -43,7 +43,7 @@ #include "sa.h" static void mcast_add_one(struct ib_device *device); -static void mcast_remove_one(struct ib_device *device); +static void mcast_remove_one(struct ib_device *device, void *client_data); static struct ib_client mcast_client = { .name = "ib_multicast", @@ -729,7 +729,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, u16 gid_index; u8 p; - ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); + ret = ib_find_cached_gid(device, &rec->port_gid, + NULL, &p, &gid_index); if (ret) return ret; @@ -780,8 +781,7 @@ static void mcast_event_handler(struct ib_event_handler *handler, int index; dev = container_of(handler, struct mcast_device, event_handler); - if (rdma_port_get_link_layer(dev->device, event->element.port_num) != - IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_mcast(dev->device, event->element.port_num)) return; index = event->element.port_num - dev->start_port; @@ -808,24 +808,16 @@ static void mcast_add_one(struct ib_device *device) int i; int count = 0; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, GFP_KERNEL); if (!dev) return; - if (device->node_type == RDMA_NODE_IB_SWITCH) - dev->start_port = dev->end_port = 0; - else { - dev->start_port = 1; - dev->end_port = device->phys_port_cnt; - } + dev->start_port = rdma_start_port(device); + dev->end_port = rdma_end_port(device); for (i = 0; i <= dev->end_port - dev->start_port; i++) { - if (rdma_port_get_link_layer(device, dev->start_port + i) != - IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_mcast(device, dev->start_port + i)) continue; port = &dev->port[i]; port->dev = dev; @@ -849,13 +841,12 @@ static void mcast_add_one(struct ib_device *device) ib_register_event_handler(&dev->event_handler); } -static void mcast_remove_one(struct ib_device *device) +static void mcast_remove_one(struct ib_device *device, void *client_data) { - struct mcast_device *dev; + struct mcast_device *dev = client_data; struct mcast_port *port; int i; - dev = ib_get_client_data(device, &mcast_client); if (!dev) return; @@ -863,8 +854,7 @@ static void mcast_remove_one(struct ib_device *device) flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { - if (rdma_port_get_link_layer(device, dev->start_port + i) == - IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_mcast(device, dev->start_port + i)) { port = &dev->port[i]; deref_port(port); wait_for_completion(&port->comp); diff --git a/kernel/drivers/infiniband/core/netlink.c b/kernel/drivers/infiniband/core/netlink.c index 23dd5a5c7..d47df9356 100644 --- a/kernel/drivers/infiniband/core/netlink.c +++ b/kernel/drivers/infiniband/core/netlink.c @@ -49,6 +49,14 @@ static DEFINE_MUTEX(ibnl_mutex); static struct sock *nls; static LIST_HEAD(client_list); +int ibnl_chk_listeners(unsigned int group) +{ + if (netlink_has_listeners(nls, group) == 0) + return -1; + return 0; +} +EXPORT_SYMBOL(ibnl_chk_listeners); + int ibnl_add_client(int index, int nops, const struct ibnl_client_cbs cb_table[]) { @@ -151,6 +159,23 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) !client->cb_table[op].dump) return -EINVAL; + /* + * For response or local service set_timeout request, + * there is no need to use netlink_dump_start. + */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST) || + (index == RDMA_NL_LS && + op == RDMA_NL_LS_OP_SET_TIMEOUT)) { + struct netlink_callback cb = { + .skb = skb, + .nlh = nlh, + .dump = client->cb_table[op].dump, + .module = client->cb_table[op].module, + }; + + return cb.dump(skb, &cb); + } + { struct netlink_dump_control c = { .dump = client->cb_table[op].dump, @@ -165,9 +190,39 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; } +static void ibnl_rcv_reply_skb(struct sk_buff *skb) +{ + struct nlmsghdr *nlh; + int msglen; + + /* + * Process responses until there is no more message or the first + * request. Generally speaking, it is not recommended to mix responses + * with requests. + */ + while (skb->len >= nlmsg_total_size(0)) { + nlh = nlmsg_hdr(skb); + + if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) + return; + + /* Handle response only */ + if (nlh->nlmsg_flags & NLM_F_REQUEST) + return; + + ibnl_rcv_msg(skb, nlh); + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); + } +} + static void ibnl_rcv(struct sk_buff *skb) { mutex_lock(&ibnl_mutex); + ibnl_rcv_reply_skb(skb); netlink_rcv_skb(skb, &ibnl_rcv_msg); mutex_unlock(&ibnl_mutex); } diff --git a/kernel/drivers/infiniband/core/opa_smi.h b/kernel/drivers/infiniband/core/opa_smi.h new file mode 100644 index 000000000..3bfab3505 --- /dev/null +++ b/kernel/drivers/infiniband/core/opa_smi.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __OPA_SMI_H_ +#define __OPA_SMI_H_ + +#include <rdma/ib_smi.h> +#include <rdma/opa_smi.h> + +#include "smi.h" + +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, + int port_num, int phys_port_cnt); +int opa_smi_get_fwd_port(struct opa_smp *smp); +extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp); +extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, + bool is_switch, int port_num); + +/* + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad + */ +static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp, + struct ib_device *device) +{ + /* C14-9:3 -- We're at the end of the DR segment of path */ + /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ + return (device->process_mad && + !opa_get_smp_direction(smp) && + (smp->hop_ptr == smp->hop_cnt + 1)) ? + IB_SMI_HANDLE : IB_SMI_DISCARD; +} + +/* + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad + */ +static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp *smp, + struct ib_device *device) +{ + /* C14-13:3 -- We're at the end of the DR segment of path */ + /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ + return (device->process_mad && + opa_get_smp_direction(smp) && + !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD; +} + +#endif /* __OPA_SMI_H_ */ diff --git a/kernel/drivers/infiniband/core/roce_gid_mgmt.c b/kernel/drivers/infiniband/core/roce_gid_mgmt.c new file mode 100644 index 000000000..178f98482 --- /dev/null +++ b/kernel/drivers/infiniband/core/roce_gid_mgmt.c @@ -0,0 +1,747 @@ +/* + * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "core_priv.h" + +#include <linux/in.h> +#include <linux/in6.h> + +/* For in6_dev_get/in6_dev_put */ +#include <net/addrconf.h> +#include <net/bonding.h> + +#include <rdma/ib_cache.h> +#include <rdma/ib_addr.h> + +enum gid_op_type { + GID_DEL = 0, + GID_ADD +}; + +struct update_gid_event_work { + struct work_struct work; + union ib_gid gid; + struct ib_gid_attr gid_attr; + enum gid_op_type gid_op; +}; + +#define ROCE_NETDEV_CALLBACK_SZ 3 +struct netdev_event_work_cmd { + roce_netdev_callback cb; + roce_netdev_filter filter; + struct net_device *ndev; + struct net_device *filter_ndev; +}; + +struct netdev_event_work { + struct work_struct work; + struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ]; +}; + +static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, + u8 port, union ib_gid *gid, + struct ib_gid_attr *gid_attr) +{ + switch (gid_op) { + case GID_ADD: + ib_cache_gid_add(ib_dev, port, gid, gid_attr); + break; + case GID_DEL: + ib_cache_gid_del(ib_dev, port, gid, gid_attr); + break; + } +} + +enum bonding_slave_state { + BONDING_SLAVE_STATE_ACTIVE = 1UL << 0, + BONDING_SLAVE_STATE_INACTIVE = 1UL << 1, + /* No primary slave or the device isn't a slave in bonding */ + BONDING_SLAVE_STATE_NA = 1UL << 2, +}; + +static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev, + struct net_device *upper) +{ + if (upper && netif_is_bond_master(upper)) { + struct net_device *pdev = + bond_option_active_slave_get_rcu(netdev_priv(upper)); + + if (pdev) + return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE : + BONDING_SLAVE_STATE_INACTIVE; + } + + return BONDING_SLAVE_STATE_NA; +} + +static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper) +{ + struct net_device *_upper = NULL; + struct list_head *iter; + + netdev_for_each_all_upper_dev_rcu(dev, _upper, iter) + if (_upper == upper) + break; + + return _upper == upper; +} + +#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ + BONDING_SLAVE_STATE_NA) +static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *event_ndev = (struct net_device *)cookie; + struct net_device *real_dev; + int res; + + if (!rdma_ndev) + return 0; + + rcu_read_lock(); + real_dev = rdma_vlan_dev_real_dev(event_ndev); + if (!real_dev) + real_dev = event_ndev; + + res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) && + (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) & + REQUIRED_BOND_STATES)) || + real_dev == rdma_ndev); + + rcu_read_unlock(); + return res; +} + +static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *master_dev; + int res; + + if (!rdma_ndev) + return 0; + + rcu_read_lock(); + master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev); + res = is_eth_active_slave_of_bonding_rcu(rdma_ndev, master_dev) == + BONDING_SLAVE_STATE_INACTIVE; + rcu_read_unlock(); + + return res; +} + +static int pass_all_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + return 1; +} + +static int upper_device_filter(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *event_ndev = (struct net_device *)cookie; + int res; + + if (!rdma_ndev) + return 0; + + if (rdma_ndev == event_ndev) + return 1; + + rcu_read_lock(); + res = is_upper_dev_rcu(rdma_ndev, event_ndev); + rcu_read_unlock(); + + return res; +} + +static void update_gid_ip(enum gid_op_type gid_op, + struct ib_device *ib_dev, + u8 port, struct net_device *ndev, + struct sockaddr *addr) +{ + union ib_gid gid; + struct ib_gid_attr gid_attr; + + rdma_ip2gid(addr, &gid); + memset(&gid_attr, 0, sizeof(gid_attr)); + gid_attr.ndev = ndev; + + update_gid(gid_op, ib_dev, port, &gid, &gid_attr); +} + +static void enum_netdev_default_gids(struct ib_device *ib_dev, + u8 port, struct net_device *event_ndev, + struct net_device *rdma_ndev) +{ + rcu_read_lock(); + if (!rdma_ndev || + ((rdma_ndev != event_ndev && + !is_upper_dev_rcu(rdma_ndev, event_ndev)) || + is_eth_active_slave_of_bonding_rcu(rdma_ndev, + netdev_master_upper_dev_get_rcu(rdma_ndev)) == + BONDING_SLAVE_STATE_INACTIVE)) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, + IB_CACHE_GID_DEFAULT_MODE_SET); +} + +static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, + u8 port, + struct net_device *event_ndev, + struct net_device *rdma_ndev) +{ + struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); + + if (!rdma_ndev) + return; + + if (!real_dev) + real_dev = event_ndev; + + rcu_read_lock(); + + if (is_upper_dev_rcu(rdma_ndev, event_ndev) && + is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) == + BONDING_SLAVE_STATE_INACTIVE) { + rcu_read_unlock(); + + ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, + IB_CACHE_GID_DEFAULT_MODE_DELETE); + } else { + rcu_read_unlock(); + } +} + +static void enum_netdev_ipv4_ips(struct ib_device *ib_dev, + u8 port, struct net_device *ndev) +{ + struct in_device *in_dev; + struct sin_list { + struct list_head list; + struct sockaddr_in ip; + }; + struct sin_list *sin_iter; + struct sin_list *sin_temp; + + LIST_HEAD(sin_list); + if (ndev->reg_state >= NETREG_UNREGISTERING) + return; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(ndev); + if (!in_dev) { + rcu_read_unlock(); + return; + } + + for_ifa(in_dev) { + struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + + if (!entry) { + pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv4 update\n"); + continue; + } + entry->ip.sin_family = AF_INET; + entry->ip.sin_addr.s_addr = ifa->ifa_address; + list_add_tail(&entry->list, &sin_list); + } + endfor_ifa(in_dev); + rcu_read_unlock(); + + list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) { + update_gid_ip(GID_ADD, ib_dev, port, ndev, + (struct sockaddr *)&sin_iter->ip); + list_del(&sin_iter->list); + kfree(sin_iter); + } +} + +static void enum_netdev_ipv6_ips(struct ib_device *ib_dev, + u8 port, struct net_device *ndev) +{ + struct inet6_ifaddr *ifp; + struct inet6_dev *in6_dev; + struct sin6_list { + struct list_head list; + struct sockaddr_in6 sin6; + }; + struct sin6_list *sin6_iter; + struct sin6_list *sin6_temp; + struct ib_gid_attr gid_attr = {.ndev = ndev}; + LIST_HEAD(sin6_list); + + if (ndev->reg_state >= NETREG_UNREGISTERING) + return; + + in6_dev = in6_dev_get(ndev); + if (!in6_dev) + return; + + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + + if (!entry) { + pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv6 update\n"); + continue; + } + + entry->sin6.sin6_family = AF_INET6; + entry->sin6.sin6_addr = ifp->addr; + list_add_tail(&entry->list, &sin6_list); + } + read_unlock_bh(&in6_dev->lock); + + in6_dev_put(in6_dev); + + list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) { + union ib_gid gid; + + rdma_ip2gid((struct sockaddr *)&sin6_iter->sin6, &gid); + update_gid(GID_ADD, ib_dev, port, &gid, &gid_attr); + list_del(&sin6_iter->list); + kfree(sin6_iter); + } +} + +static void _add_netdev_ips(struct ib_device *ib_dev, u8 port, + struct net_device *ndev) +{ + enum_netdev_ipv4_ips(ib_dev, port, ndev); + if (IS_ENABLED(CONFIG_IPV6)) + enum_netdev_ipv6_ips(ib_dev, port, ndev); +} + +static void add_netdev_ips(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *event_ndev = (struct net_device *)cookie; + + enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev); + _add_netdev_ips(ib_dev, port, event_ndev); +} + +static void del_netdev_ips(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *event_ndev = (struct net_device *)cookie; + + ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev); +} + +static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, + u8 port, + struct net_device *rdma_ndev, + void *cookie) +{ + struct net *net; + struct net_device *ndev; + + /* Lock the rtnl to make sure the netdevs does not move under + * our feet + */ + rtnl_lock(); + for_each_net(net) + for_each_netdev(net, ndev) + if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) + add_netdev_ips(ib_dev, port, rdma_ndev, ndev); + rtnl_unlock(); +} + +/* This function will rescan all of the network devices in the system + * and add their gids, as needed, to the relevant RoCE devices. */ +int roce_rescan_device(struct ib_device *ib_dev) +{ + ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL, + enum_all_gids_of_dev_cb, NULL); + + return 0; +} + +static void callback_for_addr_gid_device_scan(struct ib_device *device, + u8 port, + struct net_device *rdma_ndev, + void *cookie) +{ + struct update_gid_event_work *parsed = cookie; + + return update_gid(parsed->gid_op, device, + port, &parsed->gid, + &parsed->gid_attr); +} + +static void handle_netdev_upper(struct ib_device *ib_dev, u8 port, + void *cookie, + void (*handle_netdev)(struct ib_device *ib_dev, + u8 port, + struct net_device *ndev)) +{ + struct net_device *ndev = (struct net_device *)cookie; + struct upper_list { + struct list_head list; + struct net_device *upper; + }; + struct net_device *upper; + struct list_head *iter; + struct upper_list *upper_iter; + struct upper_list *upper_temp; + LIST_HEAD(upper_list); + + rcu_read_lock(); + netdev_for_each_all_upper_dev_rcu(ndev, upper, iter) { + struct upper_list *entry = kmalloc(sizeof(*entry), + GFP_ATOMIC); + + if (!entry) { + pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n"); + continue; + } + + list_add_tail(&entry->list, &upper_list); + dev_hold(upper); + entry->upper = upper; + } + rcu_read_unlock(); + + handle_netdev(ib_dev, port, ndev); + list_for_each_entry_safe(upper_iter, upper_temp, &upper_list, + list) { + handle_netdev(ib_dev, port, upper_iter->upper); + dev_put(upper_iter->upper); + list_del(&upper_iter->list); + kfree(upper_iter); + } +} + +static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, + struct net_device *event_ndev) +{ + ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev); +} + +static void del_netdev_upper_ips(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids); +} + +static void add_netdev_upper_ips(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips); +} + +static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, + void *cookie) +{ + struct net_device *master_ndev; + + rcu_read_lock(); + master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev); + if (master_ndev) + dev_hold(master_ndev); + rcu_read_unlock(); + + if (master_ndev) { + bond_delete_netdev_default_gids(ib_dev, port, master_ndev, + rdma_ndev); + dev_put(master_ndev); + } +} + +static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port, + struct net_device *rdma_ndev, void *cookie) +{ + struct net_device *event_ndev = (struct net_device *)cookie; + + bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev); +} + +/* The following functions operate on all IB devices. netdevice_event and + * addr_event execute ib_enum_all_roce_netdevs through a work. + * ib_enum_all_roce_netdevs iterates through all IB devices. + */ + +static void netdevice_event_work_handler(struct work_struct *_work) +{ + struct netdev_event_work *work = + container_of(_work, struct netdev_event_work, work); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) { + ib_enum_all_roce_netdevs(work->cmds[i].filter, + work->cmds[i].filter_ndev, + work->cmds[i].cb, + work->cmds[i].ndev); + dev_put(work->cmds[i].ndev); + dev_put(work->cmds[i].filter_ndev); + } + + kfree(work); +} + +static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, + struct net_device *ndev) +{ + unsigned int i; + struct netdev_event_work *ndev_work = + kmalloc(sizeof(*ndev_work), GFP_KERNEL); + + if (!ndev_work) { + pr_warn("roce_gid_mgmt: can't allocate work for netdevice_event\n"); + return NOTIFY_DONE; + } + + memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds)); + for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) { + if (!ndev_work->cmds[i].ndev) + ndev_work->cmds[i].ndev = ndev; + if (!ndev_work->cmds[i].filter_ndev) + ndev_work->cmds[i].filter_ndev = ndev; + dev_hold(ndev_work->cmds[i].ndev); + dev_hold(ndev_work->cmds[i].filter_ndev); + } + INIT_WORK(&ndev_work->work, netdevice_event_work_handler); + + queue_work(ib_wq, &ndev_work->work); + + return NOTIFY_DONE; +} + +static const struct netdev_event_work_cmd add_cmd = { + .cb = add_netdev_ips, .filter = is_eth_port_of_netdev}; +static const struct netdev_event_work_cmd add_cmd_upper_ips = { + .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev}; + +static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info, + struct netdev_event_work_cmd *cmds) +{ + static const struct netdev_event_work_cmd upper_ips_del_cmd = { + .cb = del_netdev_upper_ips, .filter = upper_device_filter}; + static const struct netdev_event_work_cmd bonding_default_del_cmd = { + .cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave}; + + if (changeupper_info->linking == false) { + cmds[0] = upper_ips_del_cmd; + cmds[0].ndev = changeupper_info->upper_dev; + cmds[1] = add_cmd; + } else { + cmds[0] = bonding_default_del_cmd; + cmds[0].ndev = changeupper_info->upper_dev; + cmds[1] = add_cmd_upper_ips; + cmds[1].ndev = changeupper_info->upper_dev; + cmds[1].filter_ndev = changeupper_info->upper_dev; + } +} + +static int netdevice_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + static const struct netdev_event_work_cmd del_cmd = { + .cb = del_netdev_ips, .filter = pass_all_filter}; + static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { + .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave}; + static const struct netdev_event_work_cmd default_del_cmd = { + .cb = del_netdev_default_ips, .filter = pass_all_filter}; + static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { + .cb = del_netdev_upper_ips, .filter = upper_device_filter}; + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} }; + + if (ndev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + case NETDEV_UP: + cmds[0] = bonding_default_del_cmd_join; + cmds[1] = add_cmd; + break; + + case NETDEV_UNREGISTER: + if (ndev->reg_state < NETREG_UNREGISTERED) + cmds[0] = del_cmd; + else + return NOTIFY_DONE; + break; + + case NETDEV_CHANGEADDR: + cmds[0] = default_del_cmd; + cmds[1] = add_cmd; + break; + + case NETDEV_CHANGEUPPER: + netdevice_event_changeupper( + container_of(ptr, struct netdev_notifier_changeupper_info, info), + cmds); + break; + + case NETDEV_BONDING_FAILOVER: + cmds[0] = bonding_event_ips_del_cmd; + cmds[1] = bonding_default_del_cmd_join; + cmds[2] = add_cmd_upper_ips; + break; + + default: + return NOTIFY_DONE; + } + + return netdevice_queue_work(cmds, ndev); +} + +static void update_gid_event_work_handler(struct work_struct *_work) +{ + struct update_gid_event_work *work = + container_of(_work, struct update_gid_event_work, work); + + ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev, + callback_for_addr_gid_device_scan, work); + + dev_put(work->gid_attr.ndev); + kfree(work); +} + +static int addr_event(struct notifier_block *this, unsigned long event, + struct sockaddr *sa, struct net_device *ndev) +{ + struct update_gid_event_work *work; + enum gid_op_type gid_op; + + if (ndev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + gid_op = GID_ADD; + break; + + case NETDEV_DOWN: + gid_op = GID_DEL; + break; + + default: + return NOTIFY_DONE; + } + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); + return NOTIFY_DONE; + } + + INIT_WORK(&work->work, update_gid_event_work_handler); + + rdma_ip2gid(sa, &work->gid); + work->gid_op = gid_op; + + memset(&work->gid_attr, 0, sizeof(work->gid_attr)); + dev_hold(ndev); + work->gid_attr.ndev = ndev; + + queue_work(ib_wq, &work->work); + + return NOTIFY_DONE; +} + +static int inetaddr_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct sockaddr_in in; + struct net_device *ndev; + struct in_ifaddr *ifa = ptr; + + in.sin_family = AF_INET; + in.sin_addr.s_addr = ifa->ifa_address; + ndev = ifa->ifa_dev->dev; + + return addr_event(this, event, (struct sockaddr *)&in, ndev); +} + +static int inet6addr_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct sockaddr_in6 in6; + struct net_device *ndev; + struct inet6_ifaddr *ifa6 = ptr; + + in6.sin6_family = AF_INET6; + in6.sin6_addr = ifa6->addr; + ndev = ifa6->idev->dev; + + return addr_event(this, event, (struct sockaddr *)&in6, ndev); +} + +static struct notifier_block nb_netdevice = { + .notifier_call = netdevice_event +}; + +static struct notifier_block nb_inetaddr = { + .notifier_call = inetaddr_event +}; + +static struct notifier_block nb_inet6addr = { + .notifier_call = inet6addr_event +}; + +int __init roce_gid_mgmt_init(void) +{ + register_inetaddr_notifier(&nb_inetaddr); + if (IS_ENABLED(CONFIG_IPV6)) + register_inet6addr_notifier(&nb_inet6addr); + /* We relay on the netdevice notifier to enumerate all + * existing devices in the system. Register to this notifier + * last to make sure we will not miss any IP add/del + * callbacks. + */ + register_netdevice_notifier(&nb_netdevice); + + return 0; +} + +void __exit roce_gid_mgmt_cleanup(void) +{ + if (IS_ENABLED(CONFIG_IPV6)) + unregister_inet6addr_notifier(&nb_inet6addr); + unregister_inetaddr_notifier(&nb_inetaddr); + unregister_netdevice_notifier(&nb_netdevice); + /* Ensure all gid deletion tasks complete before we go down, + * to avoid any reference to free'd memory. By the time + * ib-core is removed, all physical devices have been removed, + * so no issue with remaining hardware contexts. + */ +} diff --git a/kernel/drivers/infiniband/core/sa_query.c b/kernel/drivers/infiniband/core/sa_query.c index c38f030f0..a95a32ba5 100644 --- a/kernel/drivers/infiniband/core/sa_query.c +++ b/kernel/drivers/infiniband/core/sa_query.c @@ -45,12 +45,21 @@ #include <uapi/linux/if_ether.h> #include <rdma/ib_pack.h> #include <rdma/ib_cache.h> +#include <rdma/rdma_netlink.h> +#include <net/netlink.h> +#include <uapi/rdma/ib_user_sa.h> +#include <rdma/ib_marshall.h> #include "sa.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); MODULE_LICENSE("Dual BSD/GPL"); +#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 +#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 +#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 +static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT; + struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; @@ -80,8 +89,16 @@ struct ib_sa_query { struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; int id; + u32 flags; + struct list_head list; /* Local svc request list */ + u32 seq; /* Local svc request sequence number */ + unsigned long timeout; /* Local svc timeout */ + u8 path_use; /* How will the pathrecord be used */ }; +#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 +#define IB_SA_CANCEL 0x00000002 + struct ib_sa_service_query { void (*callback)(int, struct ib_sa_service_rec *, void *); void *context; @@ -106,8 +123,28 @@ struct ib_sa_mcmember_query { struct ib_sa_query sa_query; }; +static LIST_HEAD(ib_nl_request_list); +static DEFINE_SPINLOCK(ib_nl_request_lock); +static atomic_t ib_nl_sa_request_seq; +static struct workqueue_struct *ib_nl_wq; +static struct delayed_work ib_nl_timed_work; +static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = { + [LS_NLA_TYPE_PATH_RECORD] = {.type = NLA_BINARY, + .len = sizeof(struct ib_path_rec_data)}, + [LS_NLA_TYPE_TIMEOUT] = {.type = NLA_U32}, + [LS_NLA_TYPE_SERVICE_ID] = {.type = NLA_U64}, + [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, + .len = sizeof(struct rdma_nla_ls_gid)}, + [LS_NLA_TYPE_SGID] = {.type = NLA_BINARY, + .len = sizeof(struct rdma_nla_ls_gid)}, + [LS_NLA_TYPE_TCLASS] = {.type = NLA_U8}, + [LS_NLA_TYPE_PKEY] = {.type = NLA_U16}, + [LS_NLA_TYPE_QOS_CLASS] = {.type = NLA_U16}, +}; + + static void ib_sa_add_one(struct ib_device *device); -static void ib_sa_remove_one(struct ib_device *device); +static void ib_sa_remove_one(struct ib_device *device, void *client_data); static struct ib_client sa_client = { .name = "sa", @@ -381,6 +418,429 @@ static const struct ib_field guidinfo_rec_table[] = { .size_bits = 512 }, }; +static inline void ib_sa_disable_local_svc(struct ib_sa_query *query) +{ + query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE; +} + +static inline int ib_sa_query_cancelled(struct ib_sa_query *query) +{ + return (query->flags & IB_SA_CANCEL); +} + +static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, + struct ib_sa_query *query) +{ + struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1]; + struct ib_sa_mad *mad = query->mad_buf->mad; + ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask; + u16 val16; + u64 val64; + struct rdma_ls_resolve_header *header; + + query->mad_buf->context[1] = NULL; + + /* Construct the family header first */ + header = (struct rdma_ls_resolve_header *) + skb_put(skb, NLMSG_ALIGN(sizeof(*header))); + memcpy(header->device_name, query->port->agent->device->name, + LS_DEVICE_NAME_MAX); + header->port_num = query->port->port_num; + + if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && + sa_rec->reversible != 0) + query->path_use = LS_RESOLVE_PATH_USE_GMP; + else + query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL; + header->path_use = query->path_use; + + /* Now build the attributes */ + if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) { + val64 = be64_to_cpu(sa_rec->service_id); + nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID, + sizeof(val64), &val64); + } + if (comp_mask & IB_SA_PATH_REC_DGID) + nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID, + sizeof(sa_rec->dgid), &sa_rec->dgid); + if (comp_mask & IB_SA_PATH_REC_SGID) + nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID, + sizeof(sa_rec->sgid), &sa_rec->sgid); + if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS) + nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS, + sizeof(sa_rec->traffic_class), &sa_rec->traffic_class); + + if (comp_mask & IB_SA_PATH_REC_PKEY) { + val16 = be16_to_cpu(sa_rec->pkey); + nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY, + sizeof(val16), &val16); + } + if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) { + val16 = be16_to_cpu(sa_rec->qos_class); + nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS, + sizeof(val16), &val16); + } +} + +static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) +{ + int len = 0; + + if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) + len += nla_total_size(sizeof(u64)); + if (comp_mask & IB_SA_PATH_REC_DGID) + len += nla_total_size(sizeof(struct rdma_nla_ls_gid)); + if (comp_mask & IB_SA_PATH_REC_SGID) + len += nla_total_size(sizeof(struct rdma_nla_ls_gid)); + if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS) + len += nla_total_size(sizeof(u8)); + if (comp_mask & IB_SA_PATH_REC_PKEY) + len += nla_total_size(sizeof(u16)); + if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) + len += nla_total_size(sizeof(u16)); + + /* + * Make sure that at least some of the required comp_mask bits are + * set. + */ + if (WARN_ON(len == 0)) + return len; + + /* Add the family header */ + len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header)); + + return len; +} + +static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + void *data; + int ret = 0; + struct ib_sa_mad *mad; + int len; + + mad = query->mad_buf->mad; + len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask); + if (len <= 0) + return -EMSGSIZE; + + skb = nlmsg_new(len, gfp_mask); + if (!skb) + return -ENOMEM; + + /* Put nlmsg header only for now */ + data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS, + RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST); + if (!data) { + kfree_skb(skb); + return -EMSGSIZE; + } + + /* Add attributes */ + ib_nl_set_path_rec_attrs(skb, query); + + /* Repair the nlmsg header length */ + nlmsg_end(skb, nlh); + + ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask); + if (!ret) + ret = len; + else + ret = 0; + + return ret; +} + +static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) +{ + unsigned long flags; + unsigned long delay; + int ret; + + INIT_LIST_HEAD(&query->list); + query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); + + /* Put the request on the list first.*/ + spin_lock_irqsave(&ib_nl_request_lock, flags); + delay = msecs_to_jiffies(sa_local_svc_timeout_ms); + query->timeout = delay + jiffies; + list_add_tail(&query->list, &ib_nl_request_list); + /* Start the timeout if this is the only request */ + if (ib_nl_request_list.next == &query->list) + queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + + ret = ib_nl_send_msg(query, gfp_mask); + if (ret <= 0) { + ret = -EIO; + /* Remove the request */ + spin_lock_irqsave(&ib_nl_request_lock, flags); + list_del(&query->list); + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + } else { + ret = 0; + } + + return ret; +} + +static int ib_nl_cancel_request(struct ib_sa_query *query) +{ + unsigned long flags; + struct ib_sa_query *wait_query; + int found = 0; + + spin_lock_irqsave(&ib_nl_request_lock, flags); + list_for_each_entry(wait_query, &ib_nl_request_list, list) { + /* Let the timeout to take care of the callback */ + if (query == wait_query) { + query->flags |= IB_SA_CANCEL; + query->timeout = jiffies; + list_move(&query->list, &ib_nl_request_list); + found = 1; + mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1); + break; + } + } + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + + return found; +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc); + +static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query, + const struct nlmsghdr *nlh) +{ + struct ib_mad_send_wc mad_send_wc; + struct ib_sa_mad *mad = NULL; + const struct nlattr *head, *curr; + struct ib_path_rec_data *rec; + int len, rem; + u32 mask = 0; + int status = -EIO; + + if (query->callback) { + head = (const struct nlattr *) nlmsg_data(nlh); + len = nlmsg_len(nlh); + switch (query->path_use) { + case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: + mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; + break; + + case LS_RESOLVE_PATH_USE_ALL: + case LS_RESOLVE_PATH_USE_GMP: + default: + mask = IB_PATH_PRIMARY | IB_PATH_GMP | + IB_PATH_BIDIRECTIONAL; + break; + } + nla_for_each_attr(curr, head, len, rem) { + if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) { + rec = nla_data(curr); + /* + * Get the first one. In the future, we may + * need to get up to 6 pathrecords. + */ + if ((rec->flags & mask) == mask) { + mad = query->mad_buf->mad; + mad->mad_hdr.method |= + IB_MGMT_METHOD_RESP; + memcpy(mad->data, rec->path_rec, + sizeof(rec->path_rec)); + status = 0; + break; + } + } + } + query->callback(query, status, mad); + } + + mad_send_wc.send_buf = query->mad_buf; + mad_send_wc.status = IB_WC_SUCCESS; + send_handler(query->mad_buf->mad_agent, &mad_send_wc); +} + +static void ib_nl_request_timeout(struct work_struct *work) +{ + unsigned long flags; + struct ib_sa_query *query; + unsigned long delay; + struct ib_mad_send_wc mad_send_wc; + int ret; + + spin_lock_irqsave(&ib_nl_request_lock, flags); + while (!list_empty(&ib_nl_request_list)) { + query = list_entry(ib_nl_request_list.next, + struct ib_sa_query, list); + + if (time_after(query->timeout, jiffies)) { + delay = query->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); + break; + } + + list_del(&query->list); + ib_sa_disable_local_svc(query); + /* Hold the lock to protect against query cancellation */ + if (ib_sa_query_cancelled(query)) + ret = -1; + else + ret = ib_post_send_mad(query->mad_buf, NULL); + if (ret) { + mad_send_wc.send_buf = query->mad_buf; + mad_send_wc.status = IB_WC_WR_FLUSH_ERR; + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + send_handler(query->port->agent, &mad_send_wc); + spin_lock_irqsave(&ib_nl_request_lock, flags); + } + } + spin_unlock_irqrestore(&ib_nl_request_lock, flags); +} + +static int ib_nl_handle_set_timeout(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; + int timeout, delta, abs_delta; + const struct nlattr *attr; + unsigned long flags; + struct ib_sa_query *query; + long delay = 0; + struct nlattr *tb[LS_NLA_TYPE_MAX]; + int ret; + + if (!netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + + ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), + nlmsg_len(nlh), ib_nl_policy); + attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT]; + if (ret || !attr) + goto settimeout_out; + + timeout = *(int *) nla_data(attr); + if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN) + timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN; + if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX) + timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX; + + delta = timeout - sa_local_svc_timeout_ms; + if (delta < 0) + abs_delta = -delta; + else + abs_delta = delta; + + if (delta != 0) { + spin_lock_irqsave(&ib_nl_request_lock, flags); + sa_local_svc_timeout_ms = timeout; + list_for_each_entry(query, &ib_nl_request_list, list) { + if (delta < 0 && abs_delta > query->timeout) + query->timeout = 0; + else + query->timeout += delta; + + /* Get the new delay from the first entry */ + if (!delay) { + delay = query->timeout - jiffies; + if (delay <= 0) + delay = 1; + } + } + if (delay) + mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, + (unsigned long)delay); + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + } + +settimeout_out: + return skb->len; +} + +static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) +{ + struct nlattr *tb[LS_NLA_TYPE_MAX]; + int ret; + + if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) + return 0; + + ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), + nlmsg_len(nlh), ib_nl_policy); + if (ret) + return 0; + + return 1; +} + +static int ib_nl_handle_resolve_resp(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; + unsigned long flags; + struct ib_sa_query *query; + struct ib_mad_send_buf *send_buf; + struct ib_mad_send_wc mad_send_wc; + int found = 0; + int ret; + + if (!netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + + spin_lock_irqsave(&ib_nl_request_lock, flags); + list_for_each_entry(query, &ib_nl_request_list, list) { + /* + * If the query is cancelled, let the timeout routine + * take care of it. + */ + if (nlh->nlmsg_seq == query->seq) { + found = !ib_sa_query_cancelled(query); + if (found) + list_del(&query->list); + break; + } + } + + if (!found) { + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + goto resp_out; + } + + send_buf = query->mad_buf; + + if (!ib_nl_is_good_resolve_resp(nlh)) { + /* if the result is a failure, send out the packet via IB */ + ib_sa_disable_local_svc(query); + ret = ib_post_send_mad(query->mad_buf, NULL); + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + if (ret) { + mad_send_wc.send_buf = send_buf; + mad_send_wc.status = IB_WC_GENERAL_ERR; + send_handler(query->port->agent, &mad_send_wc); + } + } else { + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + ib_nl_process_good_resolve_rsp(query, nlh); + } + +resp_out: + return skb->len; +} + +static struct ibnl_client_cbs ib_sa_cb_table[] = { + [RDMA_NL_LS_OP_RESOLVE] = { + .dump = ib_nl_handle_resolve_resp, + .module = THIS_MODULE }, + [RDMA_NL_LS_OP_SET_TIMEOUT] = { + .dump = ib_nl_handle_set_timeout, + .module = THIS_MODULE }, +}; + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -450,7 +910,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event struct ib_sa_port *port = &sa_dev->port[event->element.port_num - sa_dev->start_port]; - if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_sa(handler->device, port->port_num)) return; spin_lock_irqsave(&port->ah_lock, flags); @@ -502,7 +962,13 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) mad_buf = query->mad_buf; spin_unlock_irqrestore(&idr_lock, flags); - ib_cancel_mad(agent, mad_buf); + /* + * If the query is still on the netlink request list, schedule + * it to be cancelled by the timeout routine. Otherwise, it has been + * sent to the MAD layer and has to be cancelled from there. + */ + if (!ib_nl_cancel_request(query)) + ib_cancel_mad(agent, mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); @@ -540,29 +1006,32 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; - force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET; + force_grh = rdma_cap_eth_ah(device, port_num); if (rec->hop_limit > 1 || force_grh) { + struct net_device *ndev = ib_get_ndev_from_path(rec); + ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; - ret = ib_find_cached_gid(device, &rec->sgid, &port_num, + ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num, &gid_index); - if (ret) + if (ret) { + if (ndev) + dev_put(ndev); return ret; + } ah_attr->grh.sgid_index = gid_index; ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; + if (ndev) + dev_put(ndev); } if (force_grh) { memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); - ah_attr->vlan_id = rec->vlan_id; - } else { - ah_attr->vlan_id = 0xffff; } - return 0; } EXPORT_SYMBOL(ib_init_ah_from_path); @@ -583,7 +1052,8 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) query->mad_buf = ib_create_send_mad(query->port->agent, 1, query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, - gfp_mask); + gfp_mask, + IB_MGMT_BASE_VERSION); if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; @@ -618,7 +1088,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { - bool preload = !!(gfp_mask & __GFP_WAIT); + bool preload = gfpflags_allow_blocking(gfp_mask); unsigned long flags; int ret, id; @@ -638,6 +1108,14 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) query->mad_buf->context[0] = query; query->id = id; + if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) { + if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) { + if (!ib_nl_make_request(query, gfp_mask)) + return id; + } + ib_sa_disable_local_svc(query); + } + ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { spin_lock_irqsave(&idr_lock, flags); @@ -677,9 +1155,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); - rec.vlan_id = 0xffff; + rec.net = NULL; + rec.ifindex = 0; memset(rec.dmac, 0, ETH_ALEN); - memset(rec.smac, 0, ETH_ALEN); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); @@ -739,7 +1217,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; - query = kmalloc(sizeof *query, gfp_mask); + query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; @@ -766,6 +1244,9 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, *sa_query = &query->sa_query; + query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE; + query->sa_query.mad_buf->context[1] = rec; + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; @@ -861,7 +1342,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, method != IB_SA_METHOD_DELETE) return -EINVAL; - query = kmalloc(sizeof *query, gfp_mask); + query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; @@ -953,7 +1434,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; - query = kmalloc(sizeof *query, gfp_mask); + query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; @@ -1050,7 +1531,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; - query = kmalloc(sizeof *query, gfp_mask); + query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; @@ -1153,16 +1634,10 @@ static void ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; int s, e, i; + int count = 0; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - - if (device->node_type == RDMA_NODE_IB_SWITCH) - s = e = 0; - else { - s = 1; - e = device->phys_port_cnt; - } + s = rdma_start_port(device); + e = rdma_end_port(device); sa_dev = kzalloc(sizeof *sa_dev + (e - s + 1) * sizeof (struct ib_sa_port), @@ -1175,7 +1650,7 @@ static void ib_sa_add_one(struct ib_device *device) for (i = 0; i <= e - s; ++i) { spin_lock_init(&sa_dev->port[i].ah_lock); - if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_sa(device, i + 1)) continue; sa_dev->port[i].sm_ah = NULL; @@ -1189,8 +1664,13 @@ static void ib_sa_add_one(struct ib_device *device) goto err; INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); + + count++; } + if (!count) + goto free; + ib_set_client_data(device, &sa_client, sa_dev); /* @@ -1204,25 +1684,26 @@ static void ib_sa_add_one(struct ib_device *device) if (ib_register_event_handler(&sa_dev->event_handler)) goto err; - for (i = 0; i <= e - s; ++i) - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + for (i = 0; i <= e - s; ++i) { + if (rdma_cap_ib_sa(device, i + 1)) update_sm_ah(&sa_dev->port[i].update_task); + } return; err: - while (--i >= 0) - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + while (--i >= 0) { + if (rdma_cap_ib_sa(device, i + 1)) ib_unregister_mad_agent(sa_dev->port[i].agent); - + } +free: kfree(sa_dev); - return; } -static void ib_sa_remove_one(struct ib_device *device) +static void ib_sa_remove_one(struct ib_device *device, void *client_data) { - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_device *sa_dev = client_data; int i; if (!sa_dev) @@ -1233,7 +1714,7 @@ static void ib_sa_remove_one(struct ib_device *device) flush_workqueue(ib_wq); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_sa(device, i + 1)) { ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); @@ -1250,6 +1731,8 @@ static int __init ib_sa_init(void) get_random_bytes(&tid, sizeof tid); + atomic_set(&ib_nl_sa_request_seq, 0); + ret = ib_register_client(&sa_client); if (ret) { printk(KERN_ERR "Couldn't register ib_sa client\n"); @@ -1262,7 +1745,25 @@ static int __init ib_sa_init(void) goto err2; } + ib_nl_wq = create_singlethread_workqueue("ib_nl_sa_wq"); + if (!ib_nl_wq) { + ret = -ENOMEM; + goto err3; + } + + if (ibnl_add_client(RDMA_NL_LS, RDMA_NL_LS_NUM_OPS, + ib_sa_cb_table)) { + pr_err("Failed to add netlink callback\n"); + ret = -EINVAL; + goto err4; + } + INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout); + return 0; +err4: + destroy_workqueue(ib_nl_wq); +err3: + mcast_cleanup(); err2: ib_unregister_client(&sa_client); err1: @@ -1271,6 +1772,10 @@ err1: static void __exit ib_sa_cleanup(void) { + ibnl_remove_client(RDMA_NL_LS); + cancel_delayed_work(&ib_nl_timed_work); + flush_workqueue(ib_nl_wq); + destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); idr_destroy(&query_idr); diff --git a/kernel/drivers/infiniband/core/smi.c b/kernel/drivers/infiniband/core/smi.c index 5855e4405..f19b23817 100644 --- a/kernel/drivers/infiniband/core/smi.c +++ b/kernel/drivers/infiniband/core/smi.c @@ -5,6 +5,7 @@ * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,85 +39,82 @@ #include <rdma/ib_smi.h> #include "smi.h" - -/* - * Fixup a directed route SMP for sending - * Return 0 if the SMP should be discarded - */ -enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num) +#include "opa_smi.h" + +static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num, + u8 *hop_ptr, u8 hop_cnt, + const u8 *initial_path, + const u8 *return_path, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) { - u8 hop_ptr, hop_cnt; - - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; - /* See section 14.2.2.2, Vol 1 IB spec */ /* C14-6 -- valid hop_cnt values are from 0 to 63 */ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) return IB_SMI_DISCARD; - if (!ib_get_smp_direction(smp)) { + if (!direction) { /* C14-9:1 */ - if (hop_cnt && hop_ptr == 0) { - smp->hop_ptr++; - return (smp->initial_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == 0) { + (*hop_ptr)++; + return (initial_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:2 */ - if (hop_ptr && hop_ptr < hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (*hop_ptr && *hop_ptr < hop_cnt) { + if (!is_switch) return IB_SMI_DISCARD; - /* smp->return_path set when received */ - smp->hop_ptr++; - return (smp->initial_path[smp->hop_ptr] == + /* return_path set when received */ + (*hop_ptr)++; + return (initial_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == hop_cnt) { - /* smp->return_path set when received */ - smp->hop_ptr++; - return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_dlid == IB_LID_PERMISSIVE ? + if (*hop_ptr == hop_cnt) { + /* return_path set when received */ + (*hop_ptr)++; + return (is_switch || + dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ /* C14-9:5 -- Fail unreasonable hop pointer */ - return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } else { /* C14-13:1 */ - if (hop_cnt && hop_ptr == hop_cnt + 1) { - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == hop_cnt + 1) { + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:2 */ - if (2 <= hop_ptr && hop_ptr <= hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { + if (!is_switch) return IB_SMI_DISCARD; - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:3 -- at the end of the DR segment of path */ - if (hop_ptr == 1) { - smp->hop_ptr--; + if (*hop_ptr == 1) { + (*hop_ptr)--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ - return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_slid == IB_LID_PERMISSIVE ? + return (is_switch || + dr_slid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */ - if (hop_ptr == 0) + if (*hop_ptr == 0) return IB_SMI_HANDLE; /* C14-13:5 -- Check for unreasonable hop pointer */ @@ -125,105 +123,163 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, } /* - * Adjust information for a received SMP - * Return 0 if the SMP should be dropped + * Fixup a directed route SMP for sending + * Return IB_SMI_DISCARD if the SMP should be discarded */ -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, - int port_num, int phys_port_cnt) +enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, + bool is_switch, int port_num) { - u8 hop_ptr, hop_cnt; + return __smi_handle_dr_smp_send(is_switch, port_num, + &smp->hop_ptr, smp->hop_cnt, + smp->initial_path, + smp->return_path, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; +enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, + bool is_switch, int port_num) +{ + return __smi_handle_dr_smp_send(is_switch, port_num, + &smp->hop_ptr, smp->hop_cnt, + smp->route.dr.initial_path, + smp->route.dr.return_path, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); +} +static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num, + int phys_port_cnt, + u8 *hop_ptr, u8 hop_cnt, + const u8 *initial_path, + u8 *return_path, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) +{ /* See section 14.2.2.2, Vol 1 IB spec */ /* C14-6 -- valid hop_cnt values are from 0 to 63 */ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) return IB_SMI_DISCARD; - if (!ib_get_smp_direction(smp)) { + if (!direction) { /* C14-9:1 -- sender should have incremented hop_ptr */ - if (hop_cnt && hop_ptr == 0) + if (hop_cnt && *hop_ptr == 0) return IB_SMI_DISCARD; /* C14-9:2 -- intermediate hop */ - if (hop_ptr && hop_ptr < hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (*hop_ptr && *hop_ptr < hop_cnt) { + if (!is_switch) return IB_SMI_DISCARD; - smp->return_path[hop_ptr] = port_num; - /* smp->hop_ptr updated when sending */ - return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ? + return_path[*hop_ptr] = port_num; + /* hop_ptr updated when sending */ + return (initial_path[*hop_ptr+1] <= phys_port_cnt ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == hop_cnt) { + if (*hop_ptr == hop_cnt) { if (hop_cnt) - smp->return_path[hop_ptr] = port_num; - /* smp->hop_ptr updated when sending */ + return_path[*hop_ptr] = port_num; + /* hop_ptr updated when sending */ - return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_dlid == IB_LID_PERMISSIVE ? + return (is_switch || + dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ /* C14-9:5 -- fail unreasonable hop pointer */ - return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } else { /* C14-13:1 */ - if (hop_cnt && hop_ptr == hop_cnt + 1) { - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == hop_cnt + 1) { + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:2 */ - if (2 <= hop_ptr && hop_ptr <= hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { + if (!is_switch) return IB_SMI_DISCARD; - /* smp->hop_ptr updated when sending */ - return (smp->return_path[hop_ptr-1] <= phys_port_cnt ? + /* hop_ptr updated when sending */ + return (return_path[*hop_ptr-1] <= phys_port_cnt ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == 1) { - if (smp->dr_slid == IB_LID_PERMISSIVE) { + if (*hop_ptr == 1) { + if (dr_slid_is_permissive) { /* giving SMP to SM - update hop_ptr */ - smp->hop_ptr--; + (*hop_ptr)--; return IB_SMI_HANDLE; } - /* smp->hop_ptr updated when sending */ - return (node_type == RDMA_NODE_IB_SWITCH ? - IB_SMI_HANDLE : IB_SMI_DISCARD); + /* hop_ptr updated when sending */ + return (is_switch ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ /* C14-13:5 -- Check for unreasonable hop pointer */ - return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } } -enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) +/* + * Adjust information for a received SMP + * Return IB_SMI_DISCARD if the SMP should be dropped + */ +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, + int port_num, int phys_port_cnt) { - u8 hop_ptr, hop_cnt; + return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, + &smp->hop_ptr, smp->hop_cnt, + smp->initial_path, + smp->return_path, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; +/* + * Adjust information for a received SMP + * Return IB_SMI_DISCARD if the SMP should be dropped + */ +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, + int port_num, int phys_port_cnt) +{ + return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, + &smp->hop_ptr, smp->hop_cnt, + smp->route.dr.initial_path, + smp->route.dr.return_path, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); +} - if (!ib_get_smp_direction(smp)) { +static enum smi_forward_action __smi_check_forward_dr_smp(u8 hop_ptr, u8 hop_cnt, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) +{ + if (!direction) { /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) return IB_SMI_FORWARD; /* C14-9:3 -- at the end of the DR segment of path */ if (hop_ptr == hop_cnt) - return (smp->dr_dlid == IB_LID_PERMISSIVE ? + return (dr_dlid_is_permissive ? IB_SMI_SEND : IB_SMI_LOCAL); /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ @@ -236,10 +292,29 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) /* C14-13:3 -- at the end of the DR segment of path */ if (hop_ptr == 1) - return (smp->dr_slid != IB_LID_PERMISSIVE ? + return (!dr_slid_is_permissive ? IB_SMI_SEND : IB_SMI_LOCAL); } return IB_SMI_LOCAL; + +} + +enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) +{ + return __smi_check_forward_dr_smp(smp->hop_ptr, smp->hop_cnt, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} + +enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp) +{ + return __smi_check_forward_dr_smp(smp->hop_ptr, smp->hop_cnt, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); } /* @@ -251,3 +326,13 @@ int smi_get_fwd_port(struct ib_smp *smp) return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] : smp->return_path[smp->hop_ptr-1]); } + +/* + * Return the forwarding port number from initial_path for outgoing SMP and + * from return_path for returning SMP + */ +int opa_smi_get_fwd_port(struct opa_smp *smp) +{ + return !opa_get_smp_direction(smp) ? smp->route.dr.initial_path[smp->hop_ptr+1] : + smp->route.dr.return_path[smp->hop_ptr-1]; +} diff --git a/kernel/drivers/infiniband/core/smi.h b/kernel/drivers/infiniband/core/smi.h index aff96bac4..33c91c8a1 100644 --- a/kernel/drivers/infiniband/core/smi.h +++ b/kernel/drivers/infiniband/core/smi.h @@ -51,12 +51,12 @@ enum smi_forward_action { IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */ }; -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int smi_get_fwd_port(struct ib_smp *smp); extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp); extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num); + bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM diff --git a/kernel/drivers/infiniband/core/sysfs.c b/kernel/drivers/infiniband/core/sysfs.c index cbd0383f6..b1f37d409 100644 --- a/kernel/drivers/infiniband/core/sysfs.c +++ b/kernel/drivers/infiniband/core/sysfs.c @@ -289,7 +289,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, union ib_gid gid; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid); + ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); if (ret) return ret; @@ -326,6 +326,8 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, int width = (tab_attr->index >> 16) & 0xff; struct ib_mad *in_mad = NULL; struct ib_mad *out_mad = NULL; + size_t mad_size = sizeof(*out_mad); + u16 out_mad_pkey_index = 0; ssize_t ret; if (!p->ibdev->process_mad) @@ -347,7 +349,10 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, in_mad->data[41] = p->port_num; /* PortSelect field */ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY, - p->port_num, NULL, NULL, in_mad, out_mad) & + p->port_num, NULL, NULL, + (const struct ib_mad_hdr *)in_mad, mad_size, + (struct ib_mad_hdr *)out_mad, &mad_size, + &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { ret = -EINVAL; @@ -452,28 +457,6 @@ static struct kobj_type port_type = { .default_attrs = port_default_attrs }; -static void ib_device_release(struct device *device) -{ - struct ib_device *dev = container_of(device, struct ib_device, dev); - - kfree(dev); -} - -static int ib_device_uevent(struct device *device, - struct kobj_uevent_env *env) -{ - struct ib_device *dev = container_of(device, struct ib_device, dev); - - if (add_uevent_var(env, "NAME=%s", dev->name)) - return -ENOMEM; - - /* - * It would be nice to pass the node GUID with the event... - */ - - return 0; -} - static struct attribute ** alloc_group_attrs(ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf), @@ -696,12 +679,6 @@ static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_desc }; -static struct class ib_class = { - .name = "infiniband", - .dev_release = ib_device_release, - .dev_uevent = ib_device_uevent, -}; - /* Show a given an attribute in the statistics group */ static ssize_t show_protocol_stat(const struct device *device, struct device_attribute *attr, char *buf, @@ -840,14 +817,12 @@ int ib_device_register_sysfs(struct ib_device *device, int ret; int i; - class_dev->class = &ib_class; - class_dev->parent = device->dma_device; - dev_set_name(class_dev, "%s", device->name); - dev_set_drvdata(class_dev, device); - - INIT_LIST_HEAD(&device->port_list); + device->dev.parent = device->dma_device; + ret = dev_set_name(class_dev, "%s", device->name); + if (ret) + return ret; - ret = device_register(class_dev); + ret = device_add(class_dev); if (ret) goto err; @@ -864,7 +839,7 @@ int ib_device_register_sysfs(struct ib_device *device, goto err_put; } - if (device->node_type == RDMA_NODE_IB_SWITCH) { + if (rdma_cap_ib_switch(device)) { ret = add_port(device, 0, port_callback); if (ret) goto err_put; @@ -910,13 +885,3 @@ void ib_device_unregister_sysfs(struct ib_device *device) device_unregister(&device->dev); } - -int ib_sysfs_setup(void) -{ - return class_register(&ib_class); -} - -void ib_sysfs_cleanup(void) -{ - class_unregister(&ib_class); -} diff --git a/kernel/drivers/infiniband/core/ucm.c b/kernel/drivers/infiniband/core/ucm.c index f2f63933e..6b4e8a008 100644 --- a/kernel/drivers/infiniband/core/ucm.c +++ b/kernel/drivers/infiniband/core/ucm.c @@ -109,7 +109,7 @@ enum { #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); -static void ib_ucm_remove_one(struct ib_device *device); +static void ib_ucm_remove_one(struct ib_device *device, void *client_data); static struct ib_client ucm_client = { .name = "ucm", @@ -658,8 +658,7 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file, if (result) goto out; - result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask, - NULL); + result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); out: ib_ucm_ctx_put(ctx); return result; @@ -1193,6 +1192,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } +static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static void ib_ucm_release_dev(struct device *dev) { struct ib_ucm_device *ucm_dev; @@ -1202,7 +1202,7 @@ static void ib_ucm_release_dev(struct device *dev) if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(ucm_dev->devnum, dev_map); else - clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map); + clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); kfree(ucm_dev); } @@ -1226,7 +1226,6 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static int find_overflow_devnum(void) { int ret; @@ -1253,8 +1252,7 @@ static void ib_ucm_add_one(struct ib_device *device) dev_t base; struct ib_ucm_device *ucm_dev; - if (!device->alloc_ucontext || - rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); @@ -1311,9 +1309,9 @@ err: return; } -static void ib_ucm_remove_one(struct ib_device *device) +static void ib_ucm_remove_one(struct ib_device *device, void *client_data) { - struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); + struct ib_ucm_device *ucm_dev = client_data; if (!ucm_dev) return; diff --git a/kernel/drivers/infiniband/core/ucma.c b/kernel/drivers/infiniband/core/ucma.c index 45d67e922..8b5a934e1 100644 --- a/kernel/drivers/infiniband/core/ucma.c +++ b/kernel/drivers/infiniband/core/ucma.c @@ -42,6 +42,7 @@ #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/module.h> +#include <linux/nsproxy.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -74,6 +75,7 @@ struct ucma_file { struct list_head ctx_list; struct list_head event_list; wait_queue_head_t poll_wait; + struct workqueue_struct *close_wq; }; struct ucma_context { @@ -89,6 +91,13 @@ struct ucma_context { struct list_head list; struct list_head mc_list; + /* mark that device is in process of destroying the internal HW + * resources, protected by the global mut + */ + int closing; + /* sync between removal event and id destroy, protected by file mut */ + int destroying; + struct work_struct close_work; }; struct ucma_multicast { @@ -107,6 +116,7 @@ struct ucma_event { struct list_head list; struct rdma_cm_id *cm_id; struct rdma_ucm_event_resp resp; + struct work_struct close_work; }; static DEFINE_MUTEX(mut); @@ -132,8 +142,12 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) mutex_lock(&mut); ctx = _ucma_find_context(id, file); - if (!IS_ERR(ctx)) - atomic_inc(&ctx->ref); + if (!IS_ERR(ctx)) { + if (ctx->closing) + ctx = ERR_PTR(-EIO); + else + atomic_inc(&ctx->ref); + } mutex_unlock(&mut); return ctx; } @@ -144,6 +158,28 @@ static void ucma_put_ctx(struct ucma_context *ctx) complete(&ctx->comp); } +static void ucma_close_event_id(struct work_struct *work) +{ + struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); + + rdma_destroy_id(uevent_close->cm_id); + kfree(uevent_close); +} + +static void ucma_close_id(struct work_struct *work) +{ + struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); + + /* once all inflight tasks are finished, we close all underlying + * resources. The context is still alive till its explicit destryoing + * by its creator. + */ + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); + /* No new events will be generated after destroying the id. */ + rdma_destroy_id(ctx->cm_id); +} + static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) { struct ucma_context *ctx; @@ -152,6 +188,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) if (!ctx) return NULL; + INIT_WORK(&ctx->close_work, ucma_close_id); atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); INIT_LIST_HEAD(&ctx->mc_list); @@ -242,6 +279,44 @@ static void ucma_set_event_context(struct ucma_context *ctx, } } +/* Called with file->mut locked for the relevant context. */ +static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) +{ + struct ucma_context *ctx = cm_id->context; + struct ucma_event *con_req_eve; + int event_found = 0; + + if (ctx->destroying) + return; + + /* only if context is pointing to cm_id that it owns it and can be + * queued to be closed, otherwise that cm_id is an inflight one that + * is part of that context event list pending to be detached and + * reattached to its new context as part of ucma_get_event, + * handled separately below. + */ + if (ctx->cm_id == cm_id) { + mutex_lock(&mut); + ctx->closing = 1; + mutex_unlock(&mut); + queue_work(ctx->file->close_wq, &ctx->close_work); + return; + } + + list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { + if (con_req_eve->cm_id == cm_id && + con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { + list_del(&con_req_eve->list); + INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); + queue_work(ctx->file->close_wq, &con_req_eve->close_work); + event_found = 1; + break; + } + } + if (!event_found) + printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n"); +} + static int ucma_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { @@ -276,14 +351,21 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, * We ignore events for new connections until userspace has set * their context. This can only happen if an error occurs on a * new connection before the user accepts it. This is okay, - * since the accept will just fail later. + * since the accept will just fail later. However, we do need + * to release the underlying HW resources in case of a device + * removal event. */ + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ucma_removal_event_handler(cm_id); + kfree(uevent); goto out; } list_add_tail(&uevent->list, &ctx->file->event_list); wake_up_interruptible(&ctx->file->poll_wait); + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ucma_removal_event_handler(cm_id); out: mutex_unlock(&ctx->file->mut); return ret; @@ -391,7 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); + ctx->cm_id = rdma_create_id(current->nsproxy->net_ns, + ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; @@ -442,9 +525,15 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) } /* - * We cannot hold file->mut when calling rdma_destroy_id() or we can - * deadlock. We also acquire file->mut in ucma_event_handler(), and - * rdma_destroy_id() will wait until all callbacks have completed. + * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At + * this point, no new events will be reported from the hardware. However, we + * still need to cleanup the UCMA context for this ID. Specifically, there + * might be events that have not yet been consumed by the user space software. + * These might include pending connect requests which we have not completed + * processing. We cannot call rdma_destroy_id while holding the lock of the + * context (file->mut), as it might cause a deadlock. We therefore extract all + * relevant events from the context pending events list while holding the + * mutex. After that we release them as needed. */ static int ucma_free_ctx(struct ucma_context *ctx) { @@ -452,8 +541,6 @@ static int ucma_free_ctx(struct ucma_context *ctx) struct ucma_event *uevent, *tmp; LIST_HEAD(list); - /* No new events will be generated after destroying the id. */ - rdma_destroy_id(ctx->cm_id); ucma_cleanup_multicast(ctx); @@ -501,10 +588,24 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ucma_put_ctx(ctx); - wait_for_completion(&ctx->comp); - resp.events_reported = ucma_free_ctx(ctx); + mutex_lock(&ctx->file->mut); + ctx->destroying = 1; + mutex_unlock(&ctx->file->mut); + flush_workqueue(ctx->file->close_wq); + /* At this point it's guaranteed that there is no inflight + * closing task */ + mutex_lock(&mut); + if (!ctx->closing) { + mutex_unlock(&mut); + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); + rdma_destroy_id(ctx->cm_id); + } else { + mutex_unlock(&mut); + } + + resp.events_reported = ucma_free_ctx(ctx); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; @@ -722,26 +823,13 @@ static ssize_t ucma_query_route(struct ucma_file *file, resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; - switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(ctx->cm_id->device, - ctx->cm_id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ucma_copy_ib_route(&resp, &ctx->cm_id->route); - break; - case IB_LINK_LAYER_ETHERNET: - ucma_copy_iboe_route(&resp, &ctx->cm_id->route); - break; - default: - break; - } - break; - case RDMA_TRANSPORT_IWARP: + + if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_iboe_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iw_route(&resp, &ctx->cm_id->route); - break; - default: - break; - } out: if (copy_to_user((void __user *)(unsigned long)cmd.response, @@ -1125,7 +1213,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx, return -EINVAL; memset(&sa_path, 0, sizeof(sa_path)); - sa_path.vlan_id = 0xffff; ib_sa_unpack_path(path_data->path_rec, &sa_path); ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); @@ -1334,10 +1421,10 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, mc = ERR_PTR(-ENOENT); else if (mc->ctx->file != file) mc = ERR_PTR(-EINVAL); - else { + else if (!atomic_inc_not_zero(&mc->ctx->ref)) + mc = ERR_PTR(-ENXIO); + else idr_remove(&multicast_idr, mc->id); - atomic_inc(&mc->ctx->ref); - } mutex_unlock(&mut); if (IS_ERR(mc)) { @@ -1367,10 +1454,10 @@ static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) /* Acquire mutex's based on pointer comparison to prevent deadlock. */ if (file1 < file2) { mutex_lock(&file1->mut); - mutex_lock(&file2->mut); + mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); } else { mutex_lock(&file2->mut); - mutex_lock(&file1->mut); + mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); } } @@ -1538,6 +1625,12 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; + file->close_wq = create_singlethread_workqueue("ucma_close_id"); + if (!file->close_wq) { + kfree(file); + return -ENOMEM; + } + INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); @@ -1556,16 +1649,34 @@ static int ucma_close(struct inode *inode, struct file *filp) mutex_lock(&file->mut); list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { + ctx->destroying = 1; mutex_unlock(&file->mut); mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); + flush_workqueue(file->close_wq); + /* At that step once ctx was marked as destroying and workqueue + * was flushed we are safe from any inflights handlers that + * might put other closing task. + */ + mutex_lock(&mut); + if (!ctx->closing) { + mutex_unlock(&mut); + /* rdma_destroy_id ensures that no event handlers are + * inflight for that id before releasing it. + */ + rdma_destroy_id(ctx->cm_id); + } else { + mutex_unlock(&mut); + } + ucma_free_ctx(ctx); mutex_lock(&file->mut); } mutex_unlock(&file->mut); + destroy_workqueue(file->close_wq); kfree(file); return 0; } @@ -1629,6 +1740,7 @@ static void __exit ucma_cleanup(void) device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); + idr_destroy(&multicast_idr); } module_init(ucma_init); diff --git a/kernel/drivers/infiniband/core/user_mad.c b/kernel/drivers/infiniband/core/user_mad.c index 928cdd20e..57f281f8d 100644 --- a/kernel/drivers/infiniband/core/user_mad.c +++ b/kernel/drivers/infiniband/core/user_mad.c @@ -99,7 +99,6 @@ struct ib_umad_port { }; struct ib_umad_device { - int start_port, end_port; struct kobject kobj; struct ib_umad_port port[0]; }; @@ -134,7 +133,7 @@ static DEFINE_SPINLOCK(port_lock); static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); -static void ib_umad_remove_one(struct ib_device *device); +static void ib_umad_remove_one(struct ib_device *device, void *client_data); static void ib_umad_release_dev(struct kobject *kobj) { @@ -263,20 +262,23 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, { struct ib_mad_recv_buf *recv_buf; int left, seg_payload, offset, max_seg_payload; + size_t seg_size; - /* We need enough room to copy the first (or only) MAD segment. */ recv_buf = &packet->recv_wc->recv_buf; - if ((packet->length <= sizeof (*recv_buf->mad) && + seg_size = packet->recv_wc->mad_seg_size; + + /* We need enough room to copy the first (or only) MAD segment. */ + if ((packet->length <= seg_size && count < hdr_size(file) + packet->length) || - (packet->length > sizeof (*recv_buf->mad) && - count < hdr_size(file) + sizeof (*recv_buf->mad))) + (packet->length > seg_size && + count < hdr_size(file) + seg_size)) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); - seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); + seg_payload = min_t(int, packet->length, seg_size); if (copy_to_user(buf, recv_buf->mad, seg_payload)) return -EFAULT; @@ -293,7 +295,7 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, return -ENOSPC; } offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); - max_seg_payload = sizeof (struct ib_mad) - offset; + max_seg_payload = seg_size - offset; for (left = packet->length - seg_payload, buf += seg_payload; left; left -= seg_payload, buf += seg_payload) { @@ -426,11 +428,11 @@ static int is_duplicate(struct ib_umad_file *file, * the same TID, reject the second as a duplicate. This is more * restrictive than required by the spec. */ - if (!ib_response_mad((struct ib_mad *) hdr)) { - if (!ib_response_mad((struct ib_mad *) sent_hdr)) + if (!ib_response_mad(hdr)) { + if (!ib_response_mad(sent_hdr)) return 1; continue; - } else if (!ib_response_mad((struct ib_mad *) sent_hdr)) + } else if (!ib_response_mad(sent_hdr)) continue; if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) @@ -451,6 +453,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; + u8 base_version; if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; @@ -517,11 +520,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rmpp_active = 0; } + base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version; data_len = count - hdr_size(file) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), packet->mad.hdr.pkey_index, rmpp_active, - hdr_len, data_len, GFP_KERNEL); + hdr_len, data_len, GFP_KERNEL, + base_version); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; @@ -1273,16 +1278,10 @@ static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; int s, e, i; + int count = 0; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - - if (device->node_type == RDMA_NODE_IB_SWITCH) - s = e = 0; - else { - s = 1; - e = device->phys_port_cnt; - } + s = rdma_start_port(device); + e = rdma_end_port(device); umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), @@ -1292,38 +1291,49 @@ static void ib_umad_add_one(struct ib_device *device) kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); - umad_dev->start_port = s; - umad_dev->end_port = e; - for (i = s; i <= e; ++i) { + if (!rdma_cap_ib_mad(device, i)) + continue; + umad_dev->port[i - s].umad_dev = umad_dev; if (ib_umad_init_port(device, i, umad_dev, &umad_dev->port[i - s])) goto err; + + count++; } + if (!count) + goto free; + ib_set_client_data(device, &umad_client, umad_dev); return; err: - while (--i >= s) - ib_umad_kill_port(&umad_dev->port[i - s]); + while (--i >= s) { + if (!rdma_cap_ib_mad(device, i)) + continue; + ib_umad_kill_port(&umad_dev->port[i - s]); + } +free: kobject_put(&umad_dev->kobj); } -static void ib_umad_remove_one(struct ib_device *device) +static void ib_umad_remove_one(struct ib_device *device, void *client_data) { - struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client); + struct ib_umad_device *umad_dev = client_data; int i; if (!umad_dev) return; - for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) - ib_umad_kill_port(&umad_dev->port[i]); + for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { + if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) + ib_umad_kill_port(&umad_dev->port[i]); + } kobject_put(&umad_dev->kobj); } diff --git a/kernel/drivers/infiniband/core/uverbs.h b/kernel/drivers/infiniband/core/uverbs.h index bebf11a66..94bbd8c15 100644 --- a/kernel/drivers/infiniband/core/uverbs.h +++ b/kernel/drivers/infiniband/core/uverbs.h @@ -89,12 +89,16 @@ struct ib_uverbs_device { int num_comp_vectors; struct completion comp; struct device *dev; - struct ib_device *ib_dev; + struct ib_device __rcu *ib_dev; int devnum; struct cdev cdev; struct rb_root xrcd_tree; struct mutex xrcd_tree_mutex; struct kobject kobj; + struct srcu_struct disassociate_srcu; + struct mutex lists_mutex; /* protect lists */ + struct list_head uverbs_file_list; + struct list_head uverbs_events_file_list; }; struct ib_uverbs_event_file { @@ -106,6 +110,7 @@ struct ib_uverbs_event_file { wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; + struct list_head list; }; struct ib_uverbs_file { @@ -115,6 +120,8 @@ struct ib_uverbs_file { struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_event_file *async_file; + struct list_head list; + int is_closed; }; struct ib_uverbs_event { @@ -178,7 +185,9 @@ extern struct idr ib_uverbs_rule_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev, int is_async); +void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, @@ -213,6 +222,7 @@ struct ib_uverbs_flow_spec { #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ + struct ib_device *ib_dev, \ const char __user *buf, int in_len, \ int out_len) @@ -254,11 +264,14 @@ IB_UVERBS_DECLARE_CMD(close_xrcd); #define IB_UVERBS_DECLARE_EX_CMD(name) \ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ + struct ib_device *ib_dev, \ struct ib_udata *ucore, \ struct ib_udata *uhw) IB_UVERBS_DECLARE_EX_CMD(create_flow); IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); +IB_UVERBS_DECLARE_EX_CMD(create_cq); +IB_UVERBS_DECLARE_EX_CMD(create_qp); #endif /* UVERBS_H */ diff --git a/kernel/drivers/infiniband/core/uverbs_cmd.c b/kernel/drivers/infiniband/core/uverbs_cmd.c index ccc2494b4..1c02deab0 100644 --- a/kernel/drivers/infiniband/core/uverbs_cmd.c +++ b/kernel/drivers/infiniband/core/uverbs_cmd.c @@ -62,9 +62,11 @@ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; * The ib_uobject locking scheme is as follows: * * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it - * needs to be held during all idr operations. When an object is + * needs to be held during all idr write operations. When an object is * looked up, a reference must be taken on the object's kref before - * dropping this lock. + * dropping this lock. For read operations, the rcu_read_lock() + * and rcu_write_lock() but similarly the kref reference is grabbed + * before the rcu_read_unlock(). * * - Each object also has an rwsem. This rwsem must be held for * reading while an operation that uses the object is performed. @@ -96,7 +98,7 @@ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, static void release_uobj(struct kref *kref) { - kfree(container_of(kref, struct ib_uobject, ref)); + kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); } static void put_uobj(struct ib_uobject *uobj) @@ -145,7 +147,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, { struct ib_uobject *uobj; - spin_lock(&ib_uverbs_idr_lock); + rcu_read_lock(); uobj = idr_find(idr, id); if (uobj) { if (uobj->context == context) @@ -153,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, else uobj = NULL; } - spin_unlock(&ib_uverbs_idr_lock); + rcu_read_unlock(); return uobj; } @@ -282,13 +284,13 @@ static void put_xrcd_read(struct ib_uobject *uobj) } ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_get_context cmd; struct ib_uverbs_get_context_resp resp; struct ib_udata udata; - struct ib_device *ibdev = file->device->ib_dev; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_device_attr dev_attr; #endif @@ -313,13 +315,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - ucontext = ibdev->alloc_ucontext(ibdev, &udata); + ucontext = ib_dev->alloc_ucontext(ib_dev, &udata); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); goto err; } - ucontext->device = ibdev; + ucontext->device = ib_dev; INIT_LIST_HEAD(&ucontext->pd_list); INIT_LIST_HEAD(&ucontext->mr_list); INIT_LIST_HEAD(&ucontext->mw_list); @@ -340,7 +342,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->odp_mrs_count = 0; INIT_LIST_HEAD(&ucontext->no_private_counters); - ret = ib_query_device(ibdev, &dev_attr); + ret = ib_query_device(ib_dev, &dev_attr); if (ret) goto err_free; if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) @@ -355,7 +357,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_free; resp.async_fd = ret; - filp = ib_uverbs_alloc_event_file(file, 1); + filp = ib_uverbs_alloc_event_file(file, ib_dev, 1); if (IS_ERR(filp)) { ret = PTR_ERR(filp); goto err_fd; @@ -367,16 +369,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_file; } - file->async_file = filp->private_data; - - INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, - ib_uverbs_event_handler); - ret = ib_register_event_handler(&file->event_handler); - if (ret) - goto err_file; - - kref_get(&file->async_file->ref); - kref_get(&file->ref); file->ucontext = ucontext; fd_install(resp.async_fd, filp); @@ -386,6 +378,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, return in_len; err_file: + ib_uverbs_free_async_event_file(file); fput(filp); err_fd: @@ -393,7 +386,7 @@ err_fd: err_free: put_pid(ucontext->tgid); - ibdev->dealloc_ucontext(ucontext); + ib_dev->dealloc_ucontext(ucontext); err: mutex_unlock(&file->mutex); @@ -401,11 +394,12 @@ err: } static void copy_query_dev_fields(struct ib_uverbs_file *file, + struct ib_device *ib_dev, struct ib_uverbs_query_device_resp *resp, struct ib_device_attr *attr) { resp->fw_ver = attr->fw_ver; - resp->node_guid = file->device->ib_dev->node_guid; + resp->node_guid = ib_dev->node_guid; resp->sys_image_guid = attr->sys_image_guid; resp->max_mr_size = attr->max_mr_size; resp->page_size_cap = attr->page_size_cap; @@ -443,10 +437,11 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, resp->max_srq_sge = attr->max_srq_sge; resp->max_pkeys = attr->max_pkeys; resp->local_ca_ack_delay = attr->local_ca_ack_delay; - resp->phys_port_cnt = file->device->ib_dev->phys_port_cnt; + resp->phys_port_cnt = ib_dev->phys_port_cnt; } ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -461,12 +456,12 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - ret = ib_query_device(file->device->ib_dev, &attr); + ret = ib_query_device(ib_dev, &attr); if (ret) return ret; memset(&resp, 0, sizeof resp); - copy_query_dev_fields(file, &resp, &attr); + copy_query_dev_fields(file, ib_dev, &resp, &attr); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -476,6 +471,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, } ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -490,7 +486,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr); + ret = ib_query_port(ib_dev, cmd.port_num, &attr); if (ret) return ret; @@ -515,7 +511,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.active_width = attr.active_width; resp.active_speed = attr.active_speed; resp.phys_state = attr.phys_state; - resp.link_layer = rdma_port_get_link_layer(file->device->ib_dev, + resp.link_layer = rdma_port_get_link_layer(ib_dev, cmd.port_num); if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -526,6 +522,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, } ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -553,15 +550,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, init_uobj(uobj, 0, file->ucontext, &pd_lock_class); down_write(&uobj->mutex); - pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, - file->ucontext, &udata); + pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); if (IS_ERR(pd)) { ret = PTR_ERR(pd); goto err; } - pd->device = file->device->ib_dev; + pd->device = ib_dev; pd->uobject = uobj; + pd->local_mr = NULL; atomic_set(&pd->usecnt, 0); uobj->object = pd; @@ -600,11 +597,13 @@ err: } ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; struct ib_uobject *uobj; + struct ib_pd *pd; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -613,15 +612,20 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); if (!uobj) return -EINVAL; + pd = uobj->object; - ret = ib_dealloc_pd(uobj->object); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + if (atomic_read(&pd->usecnt)) { + ret = -EBUSY; + goto err_put; + } + ret = pd->device->dealloc_pd(uobj->object); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); if (ret) - return ret; + goto err_put; + + uobj->live = 0; + put_uobj_write(uobj); idr_remove_uobj(&ib_uverbs_pd_idr, uobj); @@ -632,6 +636,10 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, put_uobj(uobj); return in_len; + +err_put: + put_uobj_write(uobj); + return ret; } struct xrcd_table_entry { @@ -720,6 +728,7 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev, } ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -778,15 +787,14 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, down_write(&obj->uobject.mutex); if (!xrcd) { - xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, - file->ucontext, &udata); + xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; } xrcd->inode = inode; - xrcd->device = file->device->ib_dev; + xrcd->device = ib_dev; atomic_set(&xrcd->usecnt, 0); mutex_init(&xrcd->tgt_qp_mutex); INIT_LIST_HEAD(&xrcd->tgt_qp_list); @@ -857,6 +865,7 @@ err_tree_mutex_unlock: } ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -934,6 +943,7 @@ void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1043,6 +1053,7 @@ err_free: } ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1136,6 +1147,7 @@ put_uobjs: } ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1174,8 +1186,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_alloc_mw cmd; struct ib_uverbs_alloc_mw_resp resp; @@ -1256,8 +1269,9 @@ err_free: } ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) { struct ib_uverbs_dealloc_mw cmd; struct ib_mw *mw; @@ -1294,6 +1308,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1313,7 +1328,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return ret; resp.fd = ret; - filp = ib_uverbs_alloc_event_file(file, 0); + filp = ib_uverbs_alloc_event_file(file, ib_dev, 0); if (IS_ERR(filp)) { put_unused_fd(resp.fd); return PTR_ERR(filp); @@ -1330,40 +1345,38 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw, + struct ib_uverbs_ex_create_cq *cmd, + size_t cmd_sz, + int (*cb)(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *udata, + void *context), + void *context) { - struct ib_uverbs_create_cq cmd; - struct ib_uverbs_create_cq_resp resp; - struct ib_udata udata; struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; + struct ib_uverbs_ex_create_cq_resp resp; + struct ib_cq_init_attr attr = {}; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - if (cmd.comp_vector >= file->device->num_comp_vectors) - return -EINVAL; + if (cmd->comp_vector >= file->device->num_comp_vectors) + return ERR_PTR(-EINVAL); obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class); + init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); down_write(&obj->uobject.mutex); - if (cmd.comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + if (cmd->comp_channel >= 0) { + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); if (!ev_file) { ret = -EINVAL; goto err; @@ -1376,15 +1389,20 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, - cmd.comp_vector, - file->ucontext, &udata); + attr.cqe = cmd->cqe; + attr.comp_vector = cmd->comp_vector; + + if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) + attr.flags = cmd->flags; + + cq = ib_dev->create_cq(ib_dev, &attr, + file->ucontext, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; } - cq->device = file->device->ib_dev; + cq->device = ib_dev; cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; @@ -1397,14 +1415,15 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, goto err_free; memset(&resp, 0, sizeof resp); - resp.cq_handle = obj->uobject.id; - resp.cqe = cq->cqe; + resp.base.cq_handle = obj->uobject.id; + resp.base.cqe = cq->cqe; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + + ret = cb(file, obj, &resp, ucore, context); + if (ret) + goto err_cb; mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); @@ -1414,9 +1433,9 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, up_write(&obj->uobject.mutex); - return in_len; + return obj; -err_copy: +err_cb: idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); err_free: @@ -1428,10 +1447,112 @@ err_file: err: put_uobj_write(&obj->uobject); - return ret; + + return ERR_PTR(ret); +} + +static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *ucore, void *context) +{ + if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + struct ib_uverbs_ex_create_cq cmd_ex; + struct ib_uverbs_create_cq_resp resp; + struct ib_udata ucore; + struct ib_udata uhw; + struct ib_ucq_object *obj; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp)); + + INIT_UDATA(&uhw, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + + memset(&cmd_ex, 0, sizeof(cmd_ex)); + cmd_ex.user_handle = cmd.user_handle; + cmd_ex.cqe = cmd.cqe; + cmd_ex.comp_vector = cmd.comp_vector; + cmd_ex.comp_channel = cmd.comp_channel; + + obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, + offsetof(typeof(cmd_ex), comp_channel) + + sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, + NULL); + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + return in_len; +} + +static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *ucore, void *context) +{ + if (ib_copy_to_udata(ucore, resp, resp->response_length)) + return -EFAULT; + + return 0; +} + +int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_cq_resp resp; + struct ib_uverbs_ex_create_cq cmd; + struct ib_ucq_object *obj; + int err; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + if (ucore->outlen < (offsetof(typeof(resp), response_length) + + sizeof(resp.response_length))) + return -ENOSPC; + + obj = create_cq(file, ib_dev, ucore, uhw, &cmd, + min(ucore->inlen, sizeof(cmd)), + ib_uverbs_ex_create_cq_cb, NULL); + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + return 0; } ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1495,6 +1616,7 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) } ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1546,6 +1668,7 @@ out_put: } ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1568,6 +1691,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -1619,65 +1743,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_qp cmd; - struct ib_uverbs_create_qp_resp resp; - struct ib_udata udata; - struct ib_uqp_object *obj; - struct ib_device *device; - struct ib_pd *pd = NULL; - struct ib_xrcd *xrcd = NULL; - struct ib_uobject *uninitialized_var(xrcd_uobj); - struct ib_cq *scq = NULL, *rcq = NULL; - struct ib_srq *srq = NULL; - struct ib_qp *qp; - struct ib_qp_init_attr attr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; +static int create_qp(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw, + struct ib_uverbs_ex_create_qp *cmd, + size_t cmd_sz, + int (*cb)(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *udata), + void *context) +{ + struct ib_uqp_object *obj; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; + struct ib_qp *qp; + char *buf; + struct ib_qp_init_attr attr; + struct ib_uverbs_ex_create_qp_resp resp; + int ret; - if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) + if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, + &qp_lock_class); down_write(&obj->uevent.uobject.mutex); - if (cmd.qp_type == IB_QPT_XRC_TGT) { - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (cmd->qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, + &xrcd_uobj); if (!xrcd) { ret = -EINVAL; goto err_put; } device = xrcd->device; } else { - if (cmd.qp_type == IB_QPT_XRC_INI) { - cmd.max_recv_wr = cmd.max_recv_sge = 0; + if (cmd->qp_type == IB_QPT_XRC_INI) { + cmd->max_recv_wr = 0; + cmd->max_recv_sge = 0; } else { - if (cmd.is_srq) { - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (cmd->is_srq) { + srq = idr_read_srq(cmd->srq_handle, + file->ucontext); if (!srq || srq->srq_type != IB_SRQT_BASIC) { ret = -EINVAL; goto err_put; } } - if (cmd.recv_cq_handle != cmd.send_cq_handle) { - rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, + file->ucontext, 0); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1685,9 +1809,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, } } - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); rcq = rcq ?: scq; - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd || !scq) { ret = -EINVAL; goto err_put; @@ -1702,31 +1826,49 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.recv_cq = rcq; attr.srq = srq; attr.xrcd = xrcd; - attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; - attr.qp_type = cmd.qp_type; + attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : + IB_SIGNAL_REQ_WR; + attr.qp_type = cmd->qp_type; attr.create_flags = 0; - attr.cap.max_send_wr = cmd.max_send_wr; - attr.cap.max_recv_wr = cmd.max_recv_wr; - attr.cap.max_send_sge = cmd.max_send_sge; - attr.cap.max_recv_sge = cmd.max_recv_sge; - attr.cap.max_inline_data = cmd.max_inline_data; + attr.cap.max_send_wr = cmd->max_send_wr; + attr.cap.max_recv_wr = cmd->max_recv_wr; + attr.cap.max_send_sge = cmd->max_send_sge; + attr.cap.max_recv_sge = cmd->max_recv_sge; + attr.cap.max_inline_data = cmd->max_inline_data; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - if (cmd.qp_type == IB_QPT_XRC_TGT) + if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + + sizeof(cmd->create_flags)) + attr.create_flags = cmd->create_flags; + + if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { + ret = -EINVAL; + goto err_put; + } + + buf = (void *)cmd + sizeof(*cmd); + if (cmd_sz > sizeof(*cmd)) + if (!(buf[0] == 0 && !memcmp(buf, buf + 1, + cmd_sz - sizeof(*cmd) - 1))) { + ret = -EINVAL; + goto err_put; + } + + if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = device->create_qp(pd, &attr, &udata); + qp = device->create_qp(pd, &attr, uhw); if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } - if (cmd.qp_type != IB_QPT_XRC_TGT) { + if (cmd->qp_type != IB_QPT_XRC_TGT) { qp->real_qp = qp; qp->device = device; qp->pd = pd; @@ -1752,19 +1894,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_destroy; memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - resp.qp_handle = obj->uevent.uobject.id; - resp.max_recv_sge = attr.cap.max_recv_sge; - resp.max_send_sge = attr.cap.max_send_sge; - resp.max_recv_wr = attr.cap.max_recv_wr; - resp.max_send_wr = attr.cap.max_send_wr; - resp.max_inline_data = attr.cap.max_inline_data; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } + resp.base.qpn = qp->qp_num; + resp.base.qp_handle = obj->uevent.uobject.id; + resp.base.max_recv_sge = attr.cap.max_recv_sge; + resp.base.max_send_sge = attr.cap.max_send_sge; + resp.base.max_recv_wr = attr.cap.max_recv_wr; + resp.base.max_send_wr = attr.cap.max_send_wr; + resp.base.max_inline_data = attr.cap.max_inline_data; + + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + + ret = cb(file, &resp, ucore); + if (ret) + goto err_cb; if (xrcd) { obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, @@ -1790,9 +1933,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, up_write(&obj->uevent.uobject.mutex); - return in_len; - -err_copy: + return 0; +err_cb: idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); err_destroy: @@ -1814,7 +1956,115 @@ err_put: return ret; } +static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *ucore) +{ + if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_qp cmd; + struct ib_uverbs_ex_create_qp cmd_ex; + struct ib_udata ucore; + struct ib_udata uhw; + ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); + int err; + + if (out_len < resp_size) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), + resp_size); + INIT_UDATA(&uhw, buf + sizeof(cmd), + (unsigned long)cmd.response + resp_size, + in_len - sizeof(cmd), out_len - resp_size); + + memset(&cmd_ex, 0, sizeof(cmd_ex)); + cmd_ex.user_handle = cmd.user_handle; + cmd_ex.pd_handle = cmd.pd_handle; + cmd_ex.send_cq_handle = cmd.send_cq_handle; + cmd_ex.recv_cq_handle = cmd.recv_cq_handle; + cmd_ex.srq_handle = cmd.srq_handle; + cmd_ex.max_send_wr = cmd.max_send_wr; + cmd_ex.max_recv_wr = cmd.max_recv_wr; + cmd_ex.max_send_sge = cmd.max_send_sge; + cmd_ex.max_recv_sge = cmd.max_recv_sge; + cmd_ex.max_inline_data = cmd.max_inline_data; + cmd_ex.sq_sig_all = cmd.sq_sig_all; + cmd_ex.qp_type = cmd.qp_type; + cmd_ex.is_srq = cmd.is_srq; + + err = create_qp(file, &ucore, &uhw, &cmd_ex, + offsetof(typeof(cmd_ex), is_srq) + + sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, + NULL); + + if (err) + return err; + + return in_len; +} + +static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *ucore) +{ + if (ib_copy_to_udata(ucore, resp, resp->response_length)) + return -EFAULT; + + return 0; +} + +int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_qp_resp resp; + struct ib_uverbs_ex_create_qp cmd = {0}; + int err; + + if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + + sizeof(cmd.comp_mask))) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + if (ucore->outlen < (offsetof(typeof(resp), response_length) + + sizeof(resp.response_length))) + return -ENOSPC; + + err = create_qp(file, ucore, uhw, &cmd, + min(ucore->inlen, sizeof(cmd)), + ib_uverbs_ex_create_qp_cb, NULL); + + if (err) + return err; + + return 0; +} + ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_open_qp cmd; @@ -1909,6 +2159,7 @@ err_put: } ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2023,6 +2274,7 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask) } ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2095,7 +2347,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; if (qp->real_qp == qp) { - ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); + ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); if (ret) goto release_qp; ret = qp->device->modify_qp(qp, attr, @@ -2119,6 +2371,7 @@ out: } ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2176,7 +2429,14 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, return in_len; } +static void *alloc_wr(size_t wr_size, __u32 num_sge) +{ + return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + + num_sge * sizeof (struct ib_sge), GFP_KERNEL); +}; + ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2188,6 +2448,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, int i, sg_ind; int is_ud; ssize_t ret = -EINVAL; + size_t next_size; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -2223,14 +2484,87 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + - user_wr->num_sge * sizeof (struct ib_sge), - GFP_KERNEL); - if (!next) { - ret = -ENOMEM; + if (is_ud) { + struct ib_ud_wr *ud; + + if (user_wr->opcode != IB_WR_SEND && + user_wr->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + + next_size = sizeof(*ud); + ud = alloc_wr(next_size, user_wr->num_sge); + if (!ud) { + ret = -ENOMEM; + goto out_put; + } + + ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); + if (!ud->ah) { + kfree(ud); + ret = -EINVAL; + goto out_put; + } + ud->remote_qpn = user_wr->wr.ud.remote_qpn; + ud->remote_qkey = user_wr->wr.ud.remote_qkey; + + next = &ud->wr; + } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE || + user_wr->opcode == IB_WR_RDMA_READ) { + struct ib_rdma_wr *rdma; + + next_size = sizeof(*rdma); + rdma = alloc_wr(next_size, user_wr->num_sge); + if (!rdma) { + ret = -ENOMEM; + goto out_put; + } + + rdma->remote_addr = user_wr->wr.rdma.remote_addr; + rdma->rkey = user_wr->wr.rdma.rkey; + + next = &rdma->wr; + } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + struct ib_atomic_wr *atomic; + + next_size = sizeof(*atomic); + atomic = alloc_wr(next_size, user_wr->num_sge); + if (!atomic) { + ret = -ENOMEM; + goto out_put; + } + + atomic->remote_addr = user_wr->wr.atomic.remote_addr; + atomic->compare_add = user_wr->wr.atomic.compare_add; + atomic->swap = user_wr->wr.atomic.swap; + atomic->rkey = user_wr->wr.atomic.rkey; + + next = &atomic->wr; + } else if (user_wr->opcode == IB_WR_SEND || + user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_SEND_WITH_INV) { + next_size = sizeof(*next); + next = alloc_wr(next_size, user_wr->num_sge); + if (!next) { + ret = -ENOMEM; + goto out_put; + } + } else { + ret = -EINVAL; goto out_put; } + if (user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + next->ex.imm_data = + (__be32 __force) user_wr->ex.imm_data; + } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) { + next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey; + } + if (!last) wr = next; else @@ -2243,63 +2577,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->opcode = user_wr->opcode; next->send_flags = user_wr->send_flags; - if (is_ud) { - if (next->opcode != IB_WR_SEND && - next->opcode != IB_WR_SEND_WITH_IMM) { - ret = -EINVAL; - goto out_put; - } - - next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, - file->ucontext); - if (!next->wr.ud.ah) { - ret = -EINVAL; - goto out_put; - } - next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; - next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; - if (next->opcode == IB_WR_SEND_WITH_IMM) - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - } else { - switch (next->opcode) { - case IB_WR_RDMA_WRITE_WITH_IMM: - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_READ: - next->wr.rdma.remote_addr = - user_wr->wr.rdma.remote_addr; - next->wr.rdma.rkey = - user_wr->wr.rdma.rkey; - break; - case IB_WR_SEND_WITH_IMM: - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - break; - case IB_WR_SEND_WITH_INV: - next->ex.invalidate_rkey = - user_wr->ex.invalidate_rkey; - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - next->wr.atomic.remote_addr = - user_wr->wr.atomic.remote_addr; - next->wr.atomic.compare_add = - user_wr->wr.atomic.compare_add; - next->wr.atomic.swap = user_wr->wr.atomic.swap; - next->wr.atomic.rkey = user_wr->wr.atomic.rkey; - case IB_WR_SEND: - break; - default: - ret = -EINVAL; - goto out_put; - } - } - if (next->num_sge) { next->sg_list = (void *) next + - ALIGN(sizeof *next, sizeof (struct ib_sge)); + ALIGN(next_size, sizeof(struct ib_sge)); if (copy_from_user(next->sg_list, buf + sizeof cmd + cmd.wr_count * cmd.wqe_size + @@ -2330,8 +2610,8 @@ out_put: put_qp_read(qp); while (wr) { - if (is_ud && wr->wr.ud.ah) - put_ah_read(wr->wr.ud.ah); + if (is_ud && ud_wr(wr)->ah) + put_ah_read(ud_wr(wr)->ah); next = wr->next; kfree(wr); wr = next; @@ -2429,6 +2709,7 @@ err: } ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2478,6 +2759,7 @@ out: } ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2527,6 +2809,7 @@ out: } ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2567,7 +2850,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, attr.grh.sgid_index = cmd.attr.grh.sgid_index; attr.grh.hop_limit = cmd.attr.grh.hop_limit; attr.grh.traffic_class = cmd.attr.grh.traffic_class; - attr.vlan_id = 0; memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); @@ -2619,6 +2901,7 @@ err: } ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; @@ -2655,6 +2938,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2702,6 +2986,7 @@ out_put: } ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -2782,6 +3067,7 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, + struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -2942,6 +3228,7 @@ err_free_attr: } int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, + struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { @@ -2984,6 +3271,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, } static int __uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { @@ -3117,6 +3405,7 @@ err: } ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3144,7 +3433,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - ret = __uverbs_create_xsrq(file, &xcmd, &udata); + ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); if (ret) return ret; @@ -3152,6 +3441,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_xsrq cmd; @@ -3169,7 +3459,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - ret = __uverbs_create_xsrq(file, &cmd, &udata); + ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); if (ret) return ret; @@ -3177,6 +3467,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3207,6 +3498,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3247,6 +3539,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { @@ -3304,16 +3597,15 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, } int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, + struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_query_device_resp resp; struct ib_uverbs_ex_query_device cmd; struct ib_device_attr attr; - struct ib_device *device; int err; - device = file->device->ib_dev; if (ucore->inlen < sizeof(cmd)) return -EINVAL; @@ -3332,11 +3624,13 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (ucore->outlen < resp.response_length) return -ENOSPC; - err = device->query_device(device, &attr); + memset(&attr, 0, sizeof(attr)); + + err = ib_dev->query_device(ib_dev, &attr, uhw); if (err) return err; - copy_query_dev_fields(file, &resp.base, &attr); + copy_query_dev_fields(file, ib_dev, &resp.base, &attr); resp.comp_mask = 0; if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) @@ -3356,6 +3650,18 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, #endif resp.response_length += sizeof(resp.odp_caps); + if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) + goto end; + + resp.timestamp_mask = attr.timestamp_mask; + resp.response_length += sizeof(resp.timestamp_mask); + + if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) + goto end; + + resp.hca_core_clock = attr.hca_core_clock; + resp.response_length += sizeof(resp.hca_core_clock); + end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); if (err) diff --git a/kernel/drivers/infiniband/core/uverbs_main.c b/kernel/drivers/infiniband/core/uverbs_main.c index 09686d49d..e3ef28861 100644 --- a/kernel/drivers/infiniband/core/uverbs_main.c +++ b/kernel/drivers/infiniband/core/uverbs_main.c @@ -79,6 +79,7 @@ static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, + struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, @@ -119,21 +120,25 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, }; static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, + struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) = { [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, + [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, + [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, }; static void ib_uverbs_add_one(struct ib_device *device); -static void ib_uverbs_remove_one(struct ib_device *device); +static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); static void ib_uverbs_release_dev(struct kobject *kobj) { struct ib_uverbs_device *dev = container_of(kobj, struct ib_uverbs_device, kobj); + cleanup_srcu_struct(&dev->disassociate_srcu); kfree(dev); } @@ -204,9 +209,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, { struct ib_uobject *uobj, *tmp; - if (!context) - return 0; - context->closing = 1; list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { @@ -315,8 +317,16 @@ static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); + struct ib_device *ib_dev; + int srcu_key; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (ib_dev && !ib_dev->disassociate_ucontext) + module_put(ib_dev->owner); + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - module_put(file->device->ib_dev->owner); if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); @@ -340,9 +350,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, return -EAGAIN; if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->event_list))) + (!list_empty(&file->event_list) || + /* The barriers built into wait_event_interruptible() + * and wake_up() guarentee this will see the null set + * without using RCU + */ + !file->uverbs_file->device->ib_dev))) return -ERESTARTSYS; + /* If device was disassociated and no event exists set an error */ + if (list_empty(&file->event_list) && + !file->uverbs_file->device->ib_dev) + return -EIO; + spin_lock_irq(&file->lock); } @@ -405,8 +425,11 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *entry, *tmp; + int closed_already = 0; + mutex_lock(&file->uverbs_file->device->lists_mutex); spin_lock_irq(&file->lock); + closed_already = file->is_closed; file->is_closed = 1; list_for_each_entry_safe(entry, tmp, &file->event_list, list) { if (entry->counter) @@ -414,11 +437,15 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp) kfree(entry); } spin_unlock_irq(&file->lock); - - if (file->is_async) { - ib_unregister_event_handler(&file->uverbs_file->event_handler); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + if (!closed_already) { + list_del(&file->list); + if (file->is_async) + ib_unregister_event_handler(&file->uverbs_file-> + event_handler); } + mutex_unlock(&file->uverbs_file->device->lists_mutex); + + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); kref_put(&file->ref, ib_uverbs_release_event_file); return 0; @@ -550,13 +577,21 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } +void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) +{ + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + file->async_file = NULL; +} + struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev, int is_async) { struct ib_uverbs_event_file *ev_file; struct file *filp; + int ret; - ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); + ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); if (!ev_file) return ERR_PTR(-ENOMEM); @@ -565,16 +600,47 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, INIT_LIST_HEAD(&ev_file->event_list); init_waitqueue_head(&ev_file->poll_wait); ev_file->uverbs_file = uverbs_file; + kref_get(&ev_file->uverbs_file->ref); ev_file->async_queue = NULL; - ev_file->is_async = is_async; ev_file->is_closed = 0; filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops, ev_file, O_RDONLY); if (IS_ERR(filp)) - kfree(ev_file); + goto err_put_refs; + + mutex_lock(&uverbs_file->device->lists_mutex); + list_add_tail(&ev_file->list, + &uverbs_file->device->uverbs_events_file_list); + mutex_unlock(&uverbs_file->device->lists_mutex); + + if (is_async) { + WARN_ON(uverbs_file->async_file); + uverbs_file->async_file = ev_file; + kref_get(&uverbs_file->async_file->ref); + INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, + ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&uverbs_file->event_handler); + if (ret) + goto err_put_file; + + /* At that point async file stuff was fully set */ + ev_file->is_async = 1; + } return filp; + +err_put_file: + fput(filp); + kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); + uverbs_file->async_file = NULL; + return ERR_PTR(ret); + +err_put_refs: + kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); + kref_put(&ev_file->ref, ib_uverbs_release_event_file); + return filp; } /* @@ -610,8 +676,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; + struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; __u32 flags; + int srcu_key; + ssize_t ret; if (count < sizeof hdr) return -EINVAL; @@ -619,6 +688,14 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; + goto out; + } + flags = (hdr.command & IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; @@ -626,26 +703,36 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, __u32 command; if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) - return -EINVAL; + IB_USER_VERBS_CMD_COMMAND_MASK)) { + ret = -EINVAL; + goto out; + } command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; if (command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[command]) - return -EINVAL; + !uverbs_cmd_table[command]) { + ret = -EINVAL; + goto out; + } if (!file->ucontext && - command != IB_USER_VERBS_CMD_GET_CONTEXT) - return -EINVAL; + command != IB_USER_VERBS_CMD_GET_CONTEXT) { + ret = -EINVAL; + goto out; + } - if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command))) - return -ENOSYS; + if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) { + ret = -ENOSYS; + goto out; + } - if (hdr.in_words * 4 != count) - return -EINVAL; + if (hdr.in_words * 4 != count) { + ret = -EINVAL; + goto out; + } - return uverbs_cmd_table[command](file, + ret = uverbs_cmd_table[command](file, ib_dev, buf + sizeof(hdr), hdr.in_words * 4, hdr.out_words * 4); @@ -656,51 +743,72 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_udata ucore; struct ib_udata uhw; - int err; size_t written_count = count; if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) - return -EINVAL; + IB_USER_VERBS_CMD_COMMAND_MASK)) { + ret = -EINVAL; + goto out; + } command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || - !uverbs_ex_cmd_table[command]) - return -ENOSYS; + !uverbs_ex_cmd_table[command]) { + ret = -ENOSYS; + goto out; + } - if (!file->ucontext) - return -EINVAL; + if (!file->ucontext) { + ret = -EINVAL; + goto out; + } - if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command))) - return -ENOSYS; + if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) { + ret = -ENOSYS; + goto out; + } - if (count < (sizeof(hdr) + sizeof(ex_hdr))) - return -EINVAL; + if (count < (sizeof(hdr) + sizeof(ex_hdr))) { + ret = -EINVAL; + goto out; + } - if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) - return -EFAULT; + if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) { + ret = -EFAULT; + goto out; + } count -= sizeof(hdr) + sizeof(ex_hdr); buf += sizeof(hdr) + sizeof(ex_hdr); - if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) - return -EINVAL; + if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) { + ret = -EINVAL; + goto out; + } - if (ex_hdr.cmd_hdr_reserved) - return -EINVAL; + if (ex_hdr.cmd_hdr_reserved) { + ret = -EINVAL; + goto out; + } if (ex_hdr.response) { - if (!hdr.out_words && !ex_hdr.provider_out_words) - return -EINVAL; + if (!hdr.out_words && !ex_hdr.provider_out_words) { + ret = -EINVAL; + goto out; + } if (!access_ok(VERIFY_WRITE, (void __user *) (unsigned long) ex_hdr.response, - (hdr.out_words + ex_hdr.provider_out_words) * 8)) - return -EFAULT; + (hdr.out_words + ex_hdr.provider_out_words) * 8)) { + ret = -EFAULT; + goto out; + } } else { - if (hdr.out_words || ex_hdr.provider_out_words) - return -EINVAL; + if (hdr.out_words || ex_hdr.provider_out_words) { + ret = -EINVAL; + goto out; + } } INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, @@ -712,27 +820,43 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); - err = uverbs_ex_cmd_table[command](file, + ret = uverbs_ex_cmd_table[command](file, + ib_dev, &ucore, &uhw); - - if (err) - return err; - - return written_count; + if (!ret) + ret = written_count; + } else { + ret = -ENOSYS; } - return -ENOSYS; +out: + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + return ret; } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; + struct ib_device *ib_dev; + int ret = 0; + int srcu_key; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; + goto out; + } if (!file->ucontext) - return -ENODEV; + ret = -ENODEV; else - return file->device->ib_dev->mmap(file->ucontext, vma); + ret = ib_dev->mmap(file->ucontext, vma); +out: + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + return ret; } /* @@ -749,21 +873,43 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; struct ib_uverbs_file *file; + struct ib_device *ib_dev; int ret; + int module_dependent; + int srcu_key; dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; - if (!try_module_get(dev->ib_dev->owner)) { - ret = -ENODEV; + srcu_key = srcu_read_lock(&dev->disassociate_srcu); + mutex_lock(&dev->lists_mutex); + ib_dev = srcu_dereference(dev->ib_dev, + &dev->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; goto err; } - file = kmalloc(sizeof *file, GFP_KERNEL); + /* In case IB device supports disassociate ucontext, there is no hard + * dependency between uverbs device and its low level device. + */ + module_dependent = !(ib_dev->disassociate_ucontext); + + if (module_dependent) { + if (!try_module_get(ib_dev->owner)) { + ret = -ENODEV; + goto err; + } + } + + file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) { ret = -ENOMEM; - goto err_module; + if (module_dependent) + goto err_module; + + goto err; } file->device = dev; @@ -774,13 +920,18 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) filp->private_data = file; kobject_get(&dev->kobj); + list_add_tail(&file->list, &dev->uverbs_file_list); + mutex_unlock(&dev->lists_mutex); + srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return nonseekable_open(inode, filp); err_module: - module_put(dev->ib_dev->owner); + module_put(ib_dev->owner); err: + mutex_unlock(&dev->lists_mutex); + srcu_read_unlock(&dev->disassociate_srcu, srcu_key); if (atomic_dec_and_test(&dev->refcount)) ib_uverbs_comp_dev(dev); @@ -791,8 +942,18 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_device *dev = file->device; - - ib_uverbs_cleanup_ucontext(file, file->ucontext); + struct ib_ucontext *ucontext = NULL; + + mutex_lock(&file->device->lists_mutex); + ucontext = file->ucontext; + file->ucontext = NULL; + if (!file->is_closed) { + list_del(&file->list); + file->is_closed = 1; + } + mutex_unlock(&file->device->lists_mutex); + if (ucontext) + ib_uverbs_cleanup_ucontext(file, ucontext); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); @@ -829,12 +990,21 @@ static struct ib_client uverbs_client = { static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, char *buf) { + int ret = -ENODEV; + int srcu_key; struct ib_uverbs_device *dev = dev_get_drvdata(device); + struct ib_device *ib_dev; if (!dev) return -ENODEV; - return sprintf(buf, "%s\n", dev->ib_dev->name); + srcu_key = srcu_read_lock(&dev->disassociate_srcu); + ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); + if (ib_dev) + ret = sprintf(buf, "%s\n", ib_dev->name); + srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + + return ret; } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -842,11 +1012,19 @@ static ssize_t show_dev_abi_version(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); + int ret = -ENODEV; + int srcu_key; + struct ib_device *ib_dev; if (!dev) return -ENODEV; + srcu_key = srcu_read_lock(&dev->disassociate_srcu); + ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); + if (ib_dev) + ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); + srcu_read_unlock(&dev->disassociate_srcu, srcu_key); - return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); + return ret; } static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); @@ -886,6 +1064,7 @@ static void ib_uverbs_add_one(struct ib_device *device) int devnum; dev_t base; struct ib_uverbs_device *uverbs_dev; + int ret; if (!device->alloc_ucontext) return; @@ -894,11 +1073,20 @@ static void ib_uverbs_add_one(struct ib_device *device) if (!uverbs_dev) return; + ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); + if (ret) { + kfree(uverbs_dev); + return; + } + atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); + mutex_init(&uverbs_dev->lists_mutex); + INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); + INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); @@ -919,7 +1107,7 @@ static void ib_uverbs_add_one(struct ib_device *device) } spin_unlock(&map_lock); - uverbs_dev->ib_dev = device; + rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; cdev_init(&uverbs_dev->cdev, NULL); @@ -963,9 +1151,72 @@ err: return; } -static void ib_uverbs_remove_one(struct ib_device *device) +static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, + struct ib_device *ib_dev) { - struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); + struct ib_uverbs_file *file; + struct ib_uverbs_event_file *event_file; + struct ib_event event; + + /* Pending running commands to terminate */ + synchronize_srcu(&uverbs_dev->disassociate_srcu); + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + event.device = ib_dev; + + mutex_lock(&uverbs_dev->lists_mutex); + while (!list_empty(&uverbs_dev->uverbs_file_list)) { + struct ib_ucontext *ucontext; + + file = list_first_entry(&uverbs_dev->uverbs_file_list, + struct ib_uverbs_file, list); + file->is_closed = 1; + ucontext = file->ucontext; + list_del(&file->list); + file->ucontext = NULL; + kref_get(&file->ref); + mutex_unlock(&uverbs_dev->lists_mutex); + /* We must release the mutex before going ahead and calling + * disassociate_ucontext. disassociate_ucontext might end up + * indirectly calling uverbs_close, for example due to freeing + * the resources (e.g mmput). + */ + ib_uverbs_event_handler(&file->event_handler, &event); + if (ucontext) { + ib_dev->disassociate_ucontext(ucontext); + ib_uverbs_cleanup_ucontext(file, ucontext); + } + + mutex_lock(&uverbs_dev->lists_mutex); + kref_put(&file->ref, ib_uverbs_release_file); + } + + while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { + event_file = list_first_entry(&uverbs_dev-> + uverbs_events_file_list, + struct ib_uverbs_event_file, + list); + spin_lock_irq(&event_file->lock); + event_file->is_closed = 1; + spin_unlock_irq(&event_file->lock); + + list_del(&event_file->list); + if (event_file->is_async) { + ib_unregister_event_handler(&event_file->uverbs_file-> + event_handler); + event_file->uverbs_file->event_handler.device = NULL; + } + + wake_up_interruptible(&event_file->poll_wait); + kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); + } + mutex_unlock(&uverbs_dev->lists_mutex); +} + +static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) +{ + struct ib_uverbs_device *uverbs_dev = client_data; + int wait_clients = 1; if (!uverbs_dev) return; @@ -979,9 +1230,27 @@ static void ib_uverbs_remove_one(struct ib_device *device) else clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); + if (device->disassociate_ucontext) { + /* We disassociate HW resources and immediately return. + * Userspace will see a EIO errno for all future access. + * Upon returning, ib_device may be freed internally and is not + * valid any more. + * uverbs_device is still available until all clients close + * their files, then the uverbs device ref count will be zero + * and its resources will be freed. + * Note: At this point no more files can be opened since the + * cdev was deleted, however active clients can still issue + * commands and close their open files. + */ + rcu_assign_pointer(uverbs_dev->ib_dev, NULL); + ib_uverbs_free_hw_resources(uverbs_dev, device); + wait_clients = 0; + } + if (atomic_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); - wait_for_completion(&uverbs_dev->comp); + if (wait_clients) + wait_for_completion(&uverbs_dev->comp); kobject_put(&uverbs_dev->kobj); } diff --git a/kernel/drivers/infiniband/core/uverbs_marshall.c b/kernel/drivers/infiniband/core/uverbs_marshall.c index abd972474..7d2f14c9b 100644 --- a/kernel/drivers/infiniband/core/uverbs_marshall.c +++ b/kernel/drivers/infiniband/core/uverbs_marshall.c @@ -141,8 +141,8 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; - memset(dst->smac, 0, sizeof(dst->smac)); memset(dst->dmac, 0, sizeof(dst->dmac)); - dst->vlan_id = 0xffff; + dst->net = NULL; + dst->ifindex = 0; } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/kernel/drivers/infiniband/core/verbs.c b/kernel/drivers/infiniband/core/verbs.c index f93eb8da7..545906dec 100644 --- a/kernel/drivers/infiniband/core/verbs.c +++ b/kernel/drivers/infiniband/core/verbs.c @@ -41,6 +41,9 @@ #include <linux/export.h> #include <linux/string.h> #include <linux/slab.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <net/addrconf.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -48,6 +51,71 @@ #include "core_priv.h" +static const char * const ib_events[] = { + [IB_EVENT_CQ_ERR] = "CQ error", + [IB_EVENT_QP_FATAL] = "QP fatal error", + [IB_EVENT_QP_REQ_ERR] = "QP request error", + [IB_EVENT_QP_ACCESS_ERR] = "QP access error", + [IB_EVENT_COMM_EST] = "communication established", + [IB_EVENT_SQ_DRAINED] = "send queue drained", + [IB_EVENT_PATH_MIG] = "path migration successful", + [IB_EVENT_PATH_MIG_ERR] = "path migration error", + [IB_EVENT_DEVICE_FATAL] = "device fatal error", + [IB_EVENT_PORT_ACTIVE] = "port active", + [IB_EVENT_PORT_ERR] = "port error", + [IB_EVENT_LID_CHANGE] = "LID change", + [IB_EVENT_PKEY_CHANGE] = "P_key change", + [IB_EVENT_SM_CHANGE] = "SM change", + [IB_EVENT_SRQ_ERR] = "SRQ error", + [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", + [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", + [IB_EVENT_CLIENT_REREGISTER] = "client reregister", + [IB_EVENT_GID_CHANGE] = "GID changed", +}; + +const char *__attribute_const__ ib_event_msg(enum ib_event_type event) +{ + size_t index = event; + + return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? + ib_events[index] : "unrecognized event"; +} +EXPORT_SYMBOL(ib_event_msg); + +static const char * const wc_statuses[] = { + [IB_WC_SUCCESS] = "success", + [IB_WC_LOC_LEN_ERR] = "local length error", + [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", + [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", + [IB_WC_LOC_PROT_ERR] = "local protection error", + [IB_WC_WR_FLUSH_ERR] = "WR flushed", + [IB_WC_MW_BIND_ERR] = "memory management operation error", + [IB_WC_BAD_RESP_ERR] = "bad response error", + [IB_WC_LOC_ACCESS_ERR] = "local access error", + [IB_WC_REM_INV_REQ_ERR] = "invalid request error", + [IB_WC_REM_ACCESS_ERR] = "remote access error", + [IB_WC_REM_OP_ERR] = "remote operation error", + [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", + [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", + [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", + [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", + [IB_WC_REM_ABORT_ERR] = "operation aborted", + [IB_WC_INV_EECN_ERR] = "invalid EE context number", + [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", + [IB_WC_FATAL_ERR] = "fatal error", + [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", + [IB_WC_GENERAL_ERR] = "general error", +}; + +const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) +{ + size_t index = status; + + return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? + wc_statuses[index] : "unrecognized status"; +} +EXPORT_SYMBOL(ib_wc_status_msg); + __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) { switch (rate) { @@ -148,28 +216,79 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /* Protection domains */ +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + * + * Every PD has a local_dma_lkey which can be used as the lkey value for local + * memory operations. + */ struct ib_pd *ib_alloc_pd(struct ib_device *device) { struct ib_pd *pd; + struct ib_device_attr devattr; + int rc; + + rc = ib_query_device(device, &devattr); + if (rc) + return ERR_PTR(rc); pd = device->alloc_pd(device, NULL, NULL); + if (IS_ERR(pd)) + return pd; + + pd->device = device; + pd->uobject = NULL; + pd->local_mr = NULL; + atomic_set(&pd->usecnt, 0); + + if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + pd->local_dma_lkey = device->local_dma_lkey; + else { + struct ib_mr *mr; + + mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(mr)) { + ib_dealloc_pd(pd); + return (struct ib_pd *)mr; + } - if (!IS_ERR(pd)) { - pd->device = device; - pd->uobject = NULL; - atomic_set(&pd->usecnt, 0); + pd->local_mr = mr; + pd->local_dma_lkey = pd->local_mr->lkey; } - return pd; } EXPORT_SYMBOL(ib_alloc_pd); -int ib_dealloc_pd(struct ib_pd *pd) +/** + * ib_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + * + * It is an error to call this function while any resources in the pd still + * exist. The caller is responsible to synchronously destroy them and + * guarantee no new allocations will happen. + */ +void ib_dealloc_pd(struct ib_pd *pd) { - if (atomic_read(&pd->usecnt)) - return -EBUSY; + int ret; - return pd->device->dealloc_pd(pd); + if (pd->local_mr) { + ret = ib_dereg_mr(pd->local_mr); + WARN_ON(ret); + pd->local_mr = NULL; + } + + /* uverbs manipulates usecnt with proper locking, while the kabi + requires the caller to guarantee we can't race here. */ + WARN_ON(atomic_read(&pd->usecnt)); + + /* Making delalloc_pd a void return is a WIP, no driver should return + an error here. */ + ret = pd->device->dealloc_pd(pd); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); } EXPORT_SYMBOL(ib_dealloc_pd); @@ -192,32 +311,69 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, - struct ib_grh *grh, struct ib_ah_attr *ah_attr) +struct find_gid_index_context { + u16 vlan_id; +}; + +static bool find_gid_index(const union ib_gid *gid, + const struct ib_gid_attr *gid_attr, + void *context) +{ + struct find_gid_index_context *ctx = + (struct find_gid_index_context *)context; + + if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || + (is_vlan_dev(gid_attr->ndev) && + vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + return false; + + return true; +} + +static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, + u16 vlan_id, const union ib_gid *sgid, + u16 *gid_index) +{ + struct find_gid_index_context context = {.vlan_id = vlan_id}; + + return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, + &context, gid_index); +} + +int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, + const struct ib_wc *wc, const struct ib_grh *grh, + struct ib_ah_attr *ah_attr) { u32 flow_class; u16 gid_index; int ret; - int is_eth = (rdma_port_get_link_layer(device, port_num) == - IB_LINK_LAYER_ETHERNET); memset(ah_attr, 0, sizeof *ah_attr); - if (is_eth) { + if (rdma_cap_eth_ah(device, port_num)) { + u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? + wc->vlan_id : 0xffff; + if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; - if (wc->wc_flags & IB_WC_WITH_SMAC && - wc->wc_flags & IB_WC_WITH_VLAN) { - memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); - ah_attr->vlan_id = wc->vlan_id; - } else { + if (!(wc->wc_flags & IB_WC_WITH_SMAC) || + !(wc->wc_flags & IB_WC_WITH_VLAN)) { ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, - ah_attr->dmac, &ah_attr->vlan_id); + ah_attr->dmac, + wc->wc_flags & IB_WC_WITH_VLAN ? + NULL : &vlan_id, + 0); if (ret) return ret; } - } else { - ah_attr->vlan_id = 0xffff; + + ret = get_sgid_index_from_eth(device, port_num, vlan_id, + &grh->dgid, &gid_index); + if (ret) + return ret; + + if (wc->wc_flags & IB_WC_WITH_SMAC) + memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); } ah_attr->dlid = wc->slid; @@ -229,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = grh->sgid; - ret = ib_find_cached_gid(device, &grh->dgid, &port_num, - &gid_index); - if (ret) - return ret; + if (!rdma_cap_eth_ah(device, port_num)) { + ret = ib_find_cached_gid_by_port(device, &grh->dgid, + port_num, NULL, + &gid_index); + if (ret) + return ret; + } ah_attr->grh.sgid_index = (u8) gid_index; flow_class = be32_to_cpu(grh->version_tclass_flow); @@ -244,8 +403,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, } EXPORT_SYMBOL(ib_init_ah_from_wc); -struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, - struct ib_grh *grh, u8 port_num) +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, + const struct ib_grh *grh, u8 port_num) { struct ib_ah_attr ah_attr; int ret; @@ -502,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; enum ib_qp_attr_mask req_param[IB_QPT_MAX]; - enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX]; enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; - enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -585,12 +742,6 @@ static const struct { IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), }, - .req_param_add_eth = { - [IB_QPT_RC] = (IB_QP_SMAC), - [IB_QPT_UC] = (IB_QP_SMAC), - [IB_QPT_XRC_INI] = (IB_QP_SMAC), - [IB_QPT_XRC_TGT] = (IB_QP_SMAC) - }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), @@ -611,21 +762,7 @@ static const struct { [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), }, - .opt_param_add_eth = { - [IB_QPT_RC] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_UC] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID) - } - } + }, }, [IB_QPS_RTR] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -847,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; - if (ll == IB_LINK_LAYER_ETHERNET) { - req_param |= qp_state_table[cur_state][next_state]. - req_param_add_eth[type]; - opt_param |= qp_state_table[cur_state][next_state]. - opt_param_add_eth[type]; - } - if ((mask & req_param) != req_param) return 0; @@ -864,40 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_l2_attrs(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask) +int ib_resolve_eth_dmac(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, int *qp_attr_mask) { int ret = 0; - union ib_gid sgid; - if ((*qp_attr_mask & IB_QP_AV) && - (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) { - ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, - qp_attr->ah_attr.grh.sgid_index, &sgid); - if (ret) - goto out; + if (*qp_attr_mask & IB_QP_AV) { + if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || + qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) + return -EINVAL; + + if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) + return 0; + if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { - rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); - rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); - if (!(*qp_attr_mask & IB_QP_VID)) - qp_attr->vlan_id = rdma_get_vlan_id(&sgid); + rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, + qp_attr->ah_attr.dmac); } else { - ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, - qp_attr->ah_attr.dmac, &qp_attr->vlan_id); - if (ret) - goto out; - ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); - if (ret) + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + int ifindex; + + ret = ib_query_gid(qp->device, + qp_attr->ah_attr.port_num, + qp_attr->ah_attr.grh.sgid_index, + &sgid, &sgid_attr); + + if (ret || !sgid_attr.ndev) { + if (!ret) + ret = -ENXIO; goto out; + } + + ifindex = sgid_attr.ndev->ifindex; + + ret = rdma_addr_find_dmac_by_grh(&sgid, + &qp_attr->ah_attr.grh.dgid, + qp_attr->ah_attr.dmac, + NULL, ifindex); + + dev_put(sgid_attr.ndev); } - *qp_attr_mask |= IB_QP_SMAC; - if (qp_attr->vlan_id < 0xFFFF) - *qp_attr_mask |= IB_QP_VID; } out: return ret; } -EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); +EXPORT_SYMBOL(ib_resolve_eth_dmac); int ib_modify_qp(struct ib_qp *qp, @@ -906,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp, { int ret; - ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); + ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); if (ret) return ret; @@ -1012,11 +1154,12 @@ EXPORT_SYMBOL(ib_destroy_qp); struct ib_cq *ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), - void *cq_context, int cqe, int comp_vector) + void *cq_context, + const struct ib_cq_init_attr *cq_attr) { struct ib_cq *cq; - cq = device->create_cq(device, cqe, comp_vector, NULL, NULL); + cq = device->create_cq(device, cq_attr, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; @@ -1079,73 +1222,6 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) } EXPORT_SYMBOL(ib_get_dma_mr); -struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - struct ib_mr *mr; - int err; - - err = ib_check_mr_access(mr_access_flags); - if (err) - return ERR_PTR(err); - - if (!pd->device->reg_phys_mr) - return ERR_PTR(-ENOSYS); - - mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf, - mr_access_flags, iova_start); - - if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; - mr->uobject = NULL; - atomic_inc(&pd->usecnt); - atomic_set(&mr->usecnt, 0); - } - - return mr; -} -EXPORT_SYMBOL(ib_reg_phys_mr); - -int ib_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - struct ib_pd *old_pd; - int ret; - - ret = ib_check_mr_access(mr_access_flags); - if (ret) - return ret; - - if (!mr->device->rereg_phys_mr) - return -ENOSYS; - - if (atomic_read(&mr->usecnt)) - return -EBUSY; - - old_pd = mr->pd; - - ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd, - phys_buf_array, num_phys_buf, - mr_access_flags, iova_start); - - if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) { - atomic_dec(&old_pd->usecnt); - atomic_inc(&pd->usecnt); - } - - return ret; -} -EXPORT_SYMBOL(ib_rereg_phys_mr); - int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) { return mr->device->query_mr ? @@ -1170,54 +1246,28 @@ int ib_dereg_mr(struct ib_mr *mr) } EXPORT_SYMBOL(ib_dereg_mr); -struct ib_mr *ib_create_mr(struct ib_pd *pd, - struct ib_mr_init_attr *mr_init_attr) -{ - struct ib_mr *mr; - - if (!pd->device->create_mr) - return ERR_PTR(-ENOSYS); - - mr = pd->device->create_mr(pd, mr_init_attr); - - if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; - mr->uobject = NULL; - atomic_inc(&pd->usecnt); - atomic_set(&mr->usecnt, 0); - } - - return mr; -} -EXPORT_SYMBOL(ib_create_mr); - -int ib_destroy_mr(struct ib_mr *mr) -{ - struct ib_pd *pd; - int ret; - - if (atomic_read(&mr->usecnt)) - return -EBUSY; - - pd = mr->pd; - ret = mr->device->destroy_mr(mr); - if (!ret) - atomic_dec(&pd->usecnt); - - return ret; -} -EXPORT_SYMBOL(ib_destroy_mr); - -struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) +/** + * ib_alloc_mr() - Allocates a memory region + * @pd: protection domain associated with the region + * @mr_type: memory region type + * @max_num_sg: maximum sg entries available for registration. + * + * Notes: + * Memory registeration page/sg lists must not exceed max_num_sg. + * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed + * max_num_sg * used_page_size. + * + */ +struct ib_mr *ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) { struct ib_mr *mr; - if (!pd->device->alloc_fast_reg_mr) + if (!pd->device->alloc_mr) return ERR_PTR(-ENOSYS); - mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len); - + mr = pd->device->alloc_mr(pd, mr_type, max_num_sg); if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; @@ -1228,32 +1278,7 @@ struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) return mr; } -EXPORT_SYMBOL(ib_alloc_fast_reg_mr); - -struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device, - int max_page_list_len) -{ - struct ib_fast_reg_page_list *page_list; - - if (!device->alloc_fast_reg_page_list) - return ERR_PTR(-ENOSYS); - - page_list = device->alloc_fast_reg_page_list(device, max_page_list_len); - - if (!IS_ERR(page_list)) { - page_list->device = device; - page_list->max_page_list_len = max_page_list_len; - } - - return page_list; -} -EXPORT_SYMBOL(ib_alloc_fast_reg_page_list); - -void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - page_list->device->free_fast_reg_page_list(page_list); -} -EXPORT_SYMBOL(ib_free_fast_reg_page_list); +EXPORT_SYMBOL(ib_alloc_mr); /* Memory windows */ @@ -1446,3 +1471,111 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; } EXPORT_SYMBOL(ib_check_mr_status); + +/** + * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list + * and set it the memory region. + * @mr: memory region + * @sg: dma mapped scatterlist + * @sg_nents: number of entries in sg + * @page_size: page vector desired page size + * + * Constraints: + * - The first sg element is allowed to have an offset. + * - Each sg element must be aligned to page_size (or physically + * contiguous to the previous element). In case an sg element has a + * non contiguous offset, the mapping prefix will not include it. + * - The last sg element is allowed to have length less than page_size. + * - If sg_nents total byte length exceeds the mr max_num_sge * page_size + * then only max_num_sg entries will be mapped. + * + * Returns the number of sg elements that were mapped to the memory region. + * + * After this completes successfully, the memory region + * is ready for registration. + */ +int ib_map_mr_sg(struct ib_mr *mr, + struct scatterlist *sg, + int sg_nents, + unsigned int page_size) +{ + if (unlikely(!mr->device->map_mr_sg)) + return -ENOSYS; + + mr->page_size = page_size; + + return mr->device->map_mr_sg(mr, sg, sg_nents); +} +EXPORT_SYMBOL(ib_map_mr_sg); + +/** + * ib_sg_to_pages() - Convert the largest prefix of a sg list + * to a page vector + * @mr: memory region + * @sgl: dma mapped scatterlist + * @sg_nents: number of entries in sg + * @set_page: driver page assignment function pointer + * + * Core service helper for drivers to convert the largest + * prefix of given sg list to a page vector. The sg list + * prefix converted is the prefix that meet the requirements + * of ib_map_mr_sg. + * + * Returns the number of sg elements that were assigned to + * a page vector. + */ +int ib_sg_to_pages(struct ib_mr *mr, + struct scatterlist *sgl, + int sg_nents, + int (*set_page)(struct ib_mr *, u64)) +{ + struct scatterlist *sg; + u64 last_end_dma_addr = 0, last_page_addr = 0; + unsigned int last_page_off = 0; + u64 page_mask = ~((u64)mr->page_size - 1); + int i, ret; + + mr->iova = sg_dma_address(&sgl[0]); + mr->length = 0; + + for_each_sg(sgl, sg, sg_nents, i) { + u64 dma_addr = sg_dma_address(sg); + unsigned int dma_len = sg_dma_len(sg); + u64 end_dma_addr = dma_addr + dma_len; + u64 page_addr = dma_addr & page_mask; + + /* + * For the second and later elements, check whether either the + * end of element i-1 or the start of element i is not aligned + * on a page boundary. + */ + if (i && (last_page_off != 0 || page_addr != dma_addr)) { + /* Stop mapping if there is a gap. */ + if (last_end_dma_addr != dma_addr) + break; + + /* + * Coalesce this element with the last. If it is small + * enough just update mr->length. Otherwise start + * mapping from the next page. + */ + goto next_page; + } + + do { + ret = set_page(mr, page_addr); + if (unlikely(ret < 0)) + return i ? : ret; +next_page: + page_addr += mr->page_size; + } while (page_addr < end_dma_addr); + + mr->length += dma_len; + last_end_dma_addr = end_dma_addr; + last_page_addr = end_dma_addr & page_mask; + last_page_off = end_dma_addr & ~page_mask; + } + + return i; +} +EXPORT_SYMBOL(ib_sg_to_pages); diff --git a/kernel/drivers/infiniband/hw/Makefile b/kernel/drivers/infiniband/hw/Makefile index e900b0353..aded2a5cc 100644 --- a/kernel/drivers/infiniband/hw/Makefile +++ b/kernel/drivers/infiniband/hw/Makefile @@ -1,8 +1,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ -obj-$(CONFIG_INFINIBAND_IPATH) += ipath/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ -obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/ obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ diff --git a/kernel/drivers/infiniband/hw/amso1100/Kbuild b/kernel/drivers/infiniband/hw/amso1100/Kbuild deleted file mode 100644 index 950dfabcd..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/Kbuild +++ /dev/null @@ -1,6 +0,0 @@ -ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG - -obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o - -iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \ - c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o diff --git a/kernel/drivers/infiniband/hw/amso1100/Kconfig b/kernel/drivers/infiniband/hw/amso1100/Kconfig deleted file mode 100644 index e6ce5f209..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/Kconfig +++ /dev/null @@ -1,15 +0,0 @@ -config INFINIBAND_AMSO1100 - tristate "Ammasso 1100 HCA support" - depends on PCI && INET - ---help--- - This is a low-level driver for the Ammasso 1100 host - channel adapter (HCA). - -config INFINIBAND_AMSO1100_DEBUG - bool "Verbose debugging output" - depends on INFINIBAND_AMSO1100 - default n - ---help--- - This option causes the amso1100 driver to produce a bunch of - debug messages. Select this if you are developing the driver - or trying to diagnose a problem. diff --git a/kernel/drivers/infiniband/hw/amso1100/c2.c b/kernel/drivers/infiniband/hw/amso1100/c2.c deleted file mode 100644 index 766a71cce..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2.c +++ /dev/null @@ -1,1241 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/pci.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/inetdevice.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/if_vlan.h> -#include <linux/crc32.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/init.h> -#include <linux/dma-mapping.h> -#include <linux/slab.h> -#include <linux/prefetch.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/byteorder.h> - -#include <rdma/ib_smi.h> -#include "c2.h" -#include "c2_provider.h" - -MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); -MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver"); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); - -static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK - | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; - -static int debug = -1; /* defaults above */ -module_param(debug, int, 0); -MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); - -static int c2_up(struct net_device *netdev); -static int c2_down(struct net_device *netdev); -static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev); -static void c2_tx_interrupt(struct net_device *netdev); -static void c2_rx_interrupt(struct net_device *netdev); -static irqreturn_t c2_interrupt(int irq, void *dev_id); -static void c2_tx_timeout(struct net_device *netdev); -static int c2_change_mtu(struct net_device *netdev, int new_mtu); -static void c2_reset(struct c2_port *c2_port); - -static struct pci_device_id c2_pci_table[] = { - { PCI_DEVICE(0x18b8, 0xb001) }, - { 0 } -}; - -MODULE_DEVICE_TABLE(pci, c2_pci_table); - -static void c2_print_macaddr(struct net_device *netdev) -{ - pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr, netdev->irq); -} - -static void c2_set_rxbufsize(struct c2_port *c2_port) -{ - struct net_device *netdev = c2_port->netdev; - - if (netdev->mtu > RX_BUF_SIZE) - c2_port->rx_buf_size = - netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) + - NET_IP_ALIGN; - else - c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE; -} - -/* - * Allocate TX ring elements and chain them together. - * One-to-one association of adapter descriptors with ring elements. - */ -static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr, - dma_addr_t base, void __iomem * mmio_txp_ring) -{ - struct c2_tx_desc *tx_desc; - struct c2_txp_desc __iomem *txp_desc; - struct c2_element *elem; - int i; - - tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL); - if (!tx_ring->start) - return -ENOMEM; - - elem = tx_ring->start; - tx_desc = vaddr; - txp_desc = mmio_txp_ring; - for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) { - tx_desc->len = 0; - tx_desc->status = 0; - - /* Set TXP_HTXD_UNINIT */ - __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL), - (void __iomem *) txp_desc + C2_TXP_ADDR); - __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN); - __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT), - (void __iomem *) txp_desc + C2_TXP_FLAGS); - - elem->skb = NULL; - elem->ht_desc = tx_desc; - elem->hw_desc = txp_desc; - - if (i == tx_ring->count - 1) { - elem->next = tx_ring->start; - tx_desc->next_offset = base; - } else { - elem->next = elem + 1; - tx_desc->next_offset = - base + (i + 1) * sizeof(*tx_desc); - } - } - - tx_ring->to_use = tx_ring->to_clean = tx_ring->start; - - return 0; -} - -/* - * Allocate RX ring elements and chain them together. - * One-to-one association of adapter descriptors with ring elements. - */ -static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr, - dma_addr_t base, void __iomem * mmio_rxp_ring) -{ - struct c2_rx_desc *rx_desc; - struct c2_rxp_desc __iomem *rxp_desc; - struct c2_element *elem; - int i; - - rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL); - if (!rx_ring->start) - return -ENOMEM; - - elem = rx_ring->start; - rx_desc = vaddr; - rxp_desc = mmio_rxp_ring; - for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) { - rx_desc->len = 0; - rx_desc->status = 0; - - /* Set RXP_HRXD_UNINIT */ - __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK), - (void __iomem *) rxp_desc + C2_RXP_STATUS); - __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT); - __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN); - __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL), - (void __iomem *) rxp_desc + C2_RXP_ADDR); - __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT), - (void __iomem *) rxp_desc + C2_RXP_FLAGS); - - elem->skb = NULL; - elem->ht_desc = rx_desc; - elem->hw_desc = rxp_desc; - - if (i == rx_ring->count - 1) { - elem->next = rx_ring->start; - rx_desc->next_offset = base; - } else { - elem->next = elem + 1; - rx_desc->next_offset = - base + (i + 1) * sizeof(*rx_desc); - } - } - - rx_ring->to_use = rx_ring->to_clean = rx_ring->start; - - return 0; -} - -/* Setup buffer for receiving */ -static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem) -{ - struct c2_dev *c2dev = c2_port->c2dev; - struct c2_rx_desc *rx_desc = elem->ht_desc; - struct sk_buff *skb; - dma_addr_t mapaddr; - u32 maplen; - struct c2_rxp_hdr *rxp_hdr; - - skb = dev_alloc_skb(c2_port->rx_buf_size); - if (unlikely(!skb)) { - pr_debug("%s: out of memory for receive\n", - c2_port->netdev->name); - return -ENOMEM; - } - - /* Zero out the rxp hdr in the sk_buff */ - memset(skb->data, 0, sizeof(*rxp_hdr)); - - skb->dev = c2_port->netdev; - - maplen = c2_port->rx_buf_size; - mapaddr = - pci_map_single(c2dev->pcidev, skb->data, maplen, - PCI_DMA_FROMDEVICE); - - /* Set the sk_buff RXP_header to RXP_HRXD_READY */ - rxp_hdr = (struct c2_rxp_hdr *) skb->data; - rxp_hdr->flags = RXP_HRXD_READY; - - __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); - __raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)), - elem->hw_desc + C2_RXP_LEN); - __raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR); - __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY), - elem->hw_desc + C2_RXP_FLAGS); - - elem->skb = skb; - elem->mapaddr = mapaddr; - elem->maplen = maplen; - rx_desc->len = maplen; - - return 0; -} - -/* - * Allocate buffers for the Rx ring - * For receive: rx_ring.to_clean is next received frame - */ -static int c2_rx_fill(struct c2_port *c2_port) -{ - struct c2_ring *rx_ring = &c2_port->rx_ring; - struct c2_element *elem; - int ret = 0; - - elem = rx_ring->start; - do { - if (c2_rx_alloc(c2_port, elem)) { - ret = 1; - break; - } - } while ((elem = elem->next) != rx_ring->start); - - rx_ring->to_clean = rx_ring->start; - return ret; -} - -/* Free all buffers in RX ring, assumes receiver stopped */ -static void c2_rx_clean(struct c2_port *c2_port) -{ - struct c2_dev *c2dev = c2_port->c2dev; - struct c2_ring *rx_ring = &c2_port->rx_ring; - struct c2_element *elem; - struct c2_rx_desc *rx_desc; - - elem = rx_ring->start; - do { - rx_desc = elem->ht_desc; - rx_desc->len = 0; - - __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); - __raw_writew(0, elem->hw_desc + C2_RXP_COUNT); - __raw_writew(0, elem->hw_desc + C2_RXP_LEN); - __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL), - elem->hw_desc + C2_RXP_ADDR); - __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT), - elem->hw_desc + C2_RXP_FLAGS); - - if (elem->skb) { - pci_unmap_single(c2dev->pcidev, elem->mapaddr, - elem->maplen, PCI_DMA_FROMDEVICE); - dev_kfree_skb(elem->skb); - elem->skb = NULL; - } - } while ((elem = elem->next) != rx_ring->start); -} - -static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem) -{ - struct c2_tx_desc *tx_desc = elem->ht_desc; - - tx_desc->len = 0; - - pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen, - PCI_DMA_TODEVICE); - - if (elem->skb) { - dev_kfree_skb_any(elem->skb); - elem->skb = NULL; - } - - return 0; -} - -/* Free all buffers in TX ring, assumes transmitter stopped */ -static void c2_tx_clean(struct c2_port *c2_port) -{ - struct c2_ring *tx_ring = &c2_port->tx_ring; - struct c2_element *elem; - struct c2_txp_desc txp_htxd; - int retry; - unsigned long flags; - - spin_lock_irqsave(&c2_port->tx_lock, flags); - - elem = tx_ring->start; - - do { - retry = 0; - do { - txp_htxd.flags = - readw(elem->hw_desc + C2_TXP_FLAGS); - - if (txp_htxd.flags == TXP_HTXD_READY) { - retry = 1; - __raw_writew(0, - elem->hw_desc + C2_TXP_LEN); - __raw_writeq(0, - elem->hw_desc + C2_TXP_ADDR); - __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE), - elem->hw_desc + C2_TXP_FLAGS); - c2_port->netdev->stats.tx_dropped++; - break; - } else { - __raw_writew(0, - elem->hw_desc + C2_TXP_LEN); - __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL), - elem->hw_desc + C2_TXP_ADDR); - __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT), - elem->hw_desc + C2_TXP_FLAGS); - } - - c2_tx_free(c2_port->c2dev, elem); - - } while ((elem = elem->next) != tx_ring->start); - } while (retry); - - c2_port->tx_avail = c2_port->tx_ring.count - 1; - c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start; - - if (c2_port->tx_avail > MAX_SKB_FRAGS + 1) - netif_wake_queue(c2_port->netdev); - - spin_unlock_irqrestore(&c2_port->tx_lock, flags); -} - -/* - * Process transmit descriptors marked 'DONE' by the firmware, - * freeing up their unneeded sk_buffs. - */ -static void c2_tx_interrupt(struct net_device *netdev) -{ - struct c2_port *c2_port = netdev_priv(netdev); - struct c2_dev *c2dev = c2_port->c2dev; - struct c2_ring *tx_ring = &c2_port->tx_ring; - struct c2_element *elem; - struct c2_txp_desc txp_htxd; - - spin_lock(&c2_port->tx_lock); - - for (elem = tx_ring->to_clean; elem != tx_ring->to_use; - elem = elem->next) { - txp_htxd.flags = - be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS)); - - if (txp_htxd.flags != TXP_HTXD_DONE) - break; - - if (netif_msg_tx_done(c2_port)) { - /* PCI reads are expensive in fast path */ - txp_htxd.len = - be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN)); - pr_debug("%s: tx done slot %3Zu status 0x%x len " - "%5u bytes\n", - netdev->name, elem - tx_ring->start, - txp_htxd.flags, txp_htxd.len); - } - - c2_tx_free(c2dev, elem); - ++(c2_port->tx_avail); - } - - tx_ring->to_clean = elem; - - if (netif_queue_stopped(netdev) - && c2_port->tx_avail > MAX_SKB_FRAGS + 1) - netif_wake_queue(netdev); - - spin_unlock(&c2_port->tx_lock); -} - -static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem) -{ - struct c2_rx_desc *rx_desc = elem->ht_desc; - struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data; - - if (rxp_hdr->status != RXP_HRXD_OK || - rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) { - pr_debug("BAD RXP_HRXD\n"); - pr_debug(" rx_desc : %p\n", rx_desc); - pr_debug(" index : %Zu\n", - elem - c2_port->rx_ring.start); - pr_debug(" len : %u\n", rx_desc->len); - pr_debug(" rxp_hdr : %p [PA %p]\n", rxp_hdr, - (void *) __pa((unsigned long) rxp_hdr)); - pr_debug(" flags : 0x%x\n", rxp_hdr->flags); - pr_debug(" status: 0x%x\n", rxp_hdr->status); - pr_debug(" len : %u\n", rxp_hdr->len); - pr_debug(" rsvd : 0x%x\n", rxp_hdr->rsvd); - } - - /* Setup the skb for reuse since we're dropping this pkt */ - elem->skb->data = elem->skb->head; - skb_reset_tail_pointer(elem->skb); - - /* Zero out the rxp hdr in the sk_buff */ - memset(elem->skb->data, 0, sizeof(*rxp_hdr)); - - /* Write the descriptor to the adapter's rx ring */ - __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); - __raw_writew(0, elem->hw_desc + C2_RXP_COUNT); - __raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)), - elem->hw_desc + C2_RXP_LEN); - __raw_writeq((__force u64) cpu_to_be64(elem->mapaddr), - elem->hw_desc + C2_RXP_ADDR); - __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY), - elem->hw_desc + C2_RXP_FLAGS); - - pr_debug("packet dropped\n"); - c2_port->netdev->stats.rx_dropped++; -} - -static void c2_rx_interrupt(struct net_device *netdev) -{ - struct c2_port *c2_port = netdev_priv(netdev); - struct c2_dev *c2dev = c2_port->c2dev; - struct c2_ring *rx_ring = &c2_port->rx_ring; - struct c2_element *elem; - struct c2_rx_desc *rx_desc; - struct c2_rxp_hdr *rxp_hdr; - struct sk_buff *skb; - dma_addr_t mapaddr; - u32 maplen, buflen; - unsigned long flags; - - spin_lock_irqsave(&c2dev->lock, flags); - - /* Begin where we left off */ - rx_ring->to_clean = rx_ring->start + c2dev->cur_rx; - - for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean; - elem = elem->next) { - rx_desc = elem->ht_desc; - mapaddr = elem->mapaddr; - maplen = elem->maplen; - skb = elem->skb; - rxp_hdr = (struct c2_rxp_hdr *) skb->data; - - if (rxp_hdr->flags != RXP_HRXD_DONE) - break; - buflen = rxp_hdr->len; - - /* Sanity check the RXP header */ - if (rxp_hdr->status != RXP_HRXD_OK || - buflen > (rx_desc->len - sizeof(*rxp_hdr))) { - c2_rx_error(c2_port, elem); - continue; - } - - /* - * Allocate and map a new skb for replenishing the host - * RX desc - */ - if (c2_rx_alloc(c2_port, elem)) { - c2_rx_error(c2_port, elem); - continue; - } - - /* Unmap the old skb */ - pci_unmap_single(c2dev->pcidev, mapaddr, maplen, - PCI_DMA_FROMDEVICE); - - prefetch(skb->data); - - /* - * Skip past the leading 8 bytes comprising of the - * "struct c2_rxp_hdr", prepended by the adapter - * to the usual Ethernet header ("struct ethhdr"), - * to the start of the raw Ethernet packet. - * - * Fix up the various fields in the sk_buff before - * passing it up to netif_rx(). The transfer size - * (in bytes) specified by the adapter len field of - * the "struct rxp_hdr_t" does NOT include the - * "sizeof(struct c2_rxp_hdr)". - */ - skb->data += sizeof(*rxp_hdr); - skb_set_tail_pointer(skb, buflen); - skb->len = buflen; - skb->protocol = eth_type_trans(skb, netdev); - - netif_rx(skb); - - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += buflen; - } - - /* Save where we left off */ - rx_ring->to_clean = elem; - c2dev->cur_rx = elem - rx_ring->start; - C2_SET_CUR_RX(c2dev, c2dev->cur_rx); - - spin_unlock_irqrestore(&c2dev->lock, flags); -} - -/* - * Handle netisr0 TX & RX interrupts. - */ -static irqreturn_t c2_interrupt(int irq, void *dev_id) -{ - unsigned int netisr0, dmaisr; - int handled = 0; - struct c2_dev *c2dev = (struct c2_dev *) dev_id; - - /* Process CCILNET interrupts */ - netisr0 = readl(c2dev->regs + C2_NISR0); - if (netisr0) { - - /* - * There is an issue with the firmware that always - * provides the status of RX for both TX & RX - * interrupts. So process both queues here. - */ - c2_rx_interrupt(c2dev->netdev); - c2_tx_interrupt(c2dev->netdev); - - /* Clear the interrupt */ - writel(netisr0, c2dev->regs + C2_NISR0); - handled++; - } - - /* Process RNIC interrupts */ - dmaisr = readl(c2dev->regs + C2_DISR); - if (dmaisr) { - writel(dmaisr, c2dev->regs + C2_DISR); - c2_rnic_interrupt(c2dev); - handled++; - } - - if (handled) { - return IRQ_HANDLED; - } else { - return IRQ_NONE; - } -} - -static int c2_up(struct net_device *netdev) -{ - struct c2_port *c2_port = netdev_priv(netdev); - struct c2_dev *c2dev = c2_port->c2dev; - struct c2_element *elem; - struct c2_rxp_hdr *rxp_hdr; - struct in_device *in_dev; - size_t rx_size, tx_size; - int ret, i; - unsigned int netimr0; - - if (netif_msg_ifup(c2_port)) - pr_debug("%s: enabling interface\n", netdev->name); - - /* Set the Rx buffer size based on MTU */ - c2_set_rxbufsize(c2_port); - - /* Allocate DMA'able memory for Tx/Rx host descriptor rings */ - rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc); - tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc); - - c2_port->mem_size = tx_size + rx_size; - c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size, - &c2_port->dma); - if (c2_port->mem == NULL) { - pr_debug("Unable to allocate memory for " - "host descriptor rings\n"); - return -ENOMEM; - } - - /* Create the Rx host descriptor ring */ - if ((ret = - c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma, - c2dev->mmio_rxp_ring))) { - pr_debug("Unable to create RX ring\n"); - goto bail0; - } - - /* Allocate Rx buffers for the host descriptor ring */ - if (c2_rx_fill(c2_port)) { - pr_debug("Unable to fill RX ring\n"); - goto bail1; - } - - /* Create the Tx host descriptor ring */ - if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size, - c2_port->dma + rx_size, - c2dev->mmio_txp_ring))) { - pr_debug("Unable to create TX ring\n"); - goto bail1; - } - - /* Set the TX pointer to where we left off */ - c2_port->tx_avail = c2_port->tx_ring.count - 1; - c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean = - c2_port->tx_ring.start + c2dev->cur_tx; - - /* missing: Initialize MAC */ - - BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean); - - /* Reset the adapter, ensures the driver is in sync with the RXP */ - c2_reset(c2_port); - - /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */ - for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count; - i++, elem++) { - rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data; - rxp_hdr->flags = 0; - __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY), - elem->hw_desc + C2_RXP_FLAGS); - } - - /* Enable network packets */ - netif_start_queue(netdev); - - /* Enable IRQ */ - writel(0, c2dev->regs + C2_IDIS); - netimr0 = readl(c2dev->regs + C2_NIMR0); - netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT); - writel(netimr0, c2dev->regs + C2_NIMR0); - - /* Tell the stack to ignore arp requests for ipaddrs bound to - * other interfaces. This is needed to prevent the host stack - * from responding to arp requests to the ipaddr bound on the - * rdma interface. - */ - in_dev = in_dev_get(netdev); - IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1); - in_dev_put(in_dev); - - return 0; - - bail1: - c2_rx_clean(c2_port); - kfree(c2_port->rx_ring.start); - - bail0: - pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem, - c2_port->dma); - - return ret; -} - -static int c2_down(struct net_device *netdev) -{ - struct c2_port *c2_port = netdev_priv(netdev); - struct c2_dev *c2dev = c2_port->c2dev; - - if (netif_msg_ifdown(c2_port)) - pr_debug("%s: disabling interface\n", - netdev->name); - - /* Wait for all the queued packets to get sent */ - c2_tx_interrupt(netdev); - - /* Disable network packets */ - netif_stop_queue(netdev); - - /* Disable IRQs by clearing the interrupt mask */ - writel(1, c2dev->regs + C2_IDIS); - writel(0, c2dev->regs + C2_NIMR0); - - /* missing: Stop transmitter */ - - /* missing: Stop receiver */ - - /* Reset the adapter, ensures the driver is in sync with the RXP */ - c2_reset(c2_port); - - /* missing: Turn off LEDs here */ - - /* Free all buffers in the host descriptor rings */ - c2_tx_clean(c2_port); - c2_rx_clean(c2_port); - - /* Free the host descriptor rings */ - kfree(c2_port->rx_ring.start); - kfree(c2_port->tx_ring.start); - pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem, - c2_port->dma); - - return 0; -} - -static void c2_reset(struct c2_port *c2_port) -{ - struct c2_dev *c2dev = c2_port->c2dev; - unsigned int cur_rx = c2dev->cur_rx; - - /* Tell the hardware to quiesce */ - C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI); - - /* - * The hardware will reset the C2_PCI_HRX_QUI bit once - * the RXP is quiesced. Wait 2 seconds for this. - */ - ssleep(2); - - cur_rx = C2_GET_CUR_RX(c2dev); - - if (cur_rx & C2_PCI_HRX_QUI) - pr_debug("c2_reset: failed to quiesce the hardware!\n"); - - cur_rx &= ~C2_PCI_HRX_QUI; - - c2dev->cur_rx = cur_rx; - - pr_debug("Current RX: %u\n", c2dev->cur_rx); -} - -static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) -{ - struct c2_port *c2_port = netdev_priv(netdev); - struct c2_dev *c2dev = c2_port->c2dev; - struct c2_ring *tx_ring = &c2_port->tx_ring; - struct c2_element *elem; - dma_addr_t mapaddr; - u32 maplen; - unsigned long flags; - unsigned int i; - - spin_lock_irqsave(&c2_port->tx_lock, flags); - - if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) { - netif_stop_queue(netdev); - spin_unlock_irqrestore(&c2_port->tx_lock, flags); - - pr_debug("%s: Tx ring full when queue awake!\n", - netdev->name); - return NETDEV_TX_BUSY; - } - - maplen = skb_headlen(skb); - mapaddr = - pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE); - - elem = tx_ring->to_use; - elem->skb = skb; - elem->mapaddr = mapaddr; - elem->maplen = maplen; - - /* Tell HW to xmit */ - __raw_writeq((__force u64) cpu_to_be64(mapaddr), - elem->hw_desc + C2_TXP_ADDR); - __raw_writew((__force u16) cpu_to_be16(maplen), - elem->hw_desc + C2_TXP_LEN); - __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY), - elem->hw_desc + C2_TXP_FLAGS); - - netdev->stats.tx_packets++; - netdev->stats.tx_bytes += maplen; - - /* Loop thru additional data fragments and queue them */ - if (skb_shinfo(skb)->nr_frags) { - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - maplen = skb_frag_size(frag); - mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag, - 0, maplen, DMA_TO_DEVICE); - elem = elem->next; - elem->skb = NULL; - elem->mapaddr = mapaddr; - elem->maplen = maplen; - - /* Tell HW to xmit */ - __raw_writeq((__force u64) cpu_to_be64(mapaddr), - elem->hw_desc + C2_TXP_ADDR); - __raw_writew((__force u16) cpu_to_be16(maplen), - elem->hw_desc + C2_TXP_LEN); - __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY), - elem->hw_desc + C2_TXP_FLAGS); - - netdev->stats.tx_packets++; - netdev->stats.tx_bytes += maplen; - } - } - - tx_ring->to_use = elem->next; - c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1); - - if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) { - netif_stop_queue(netdev); - if (netif_msg_tx_queued(c2_port)) - pr_debug("%s: transmit queue full\n", - netdev->name); - } - - spin_unlock_irqrestore(&c2_port->tx_lock, flags); - - netdev->trans_start = jiffies; - - return NETDEV_TX_OK; -} - -static void c2_tx_timeout(struct net_device *netdev) -{ - struct c2_port *c2_port = netdev_priv(netdev); - - if (netif_msg_timer(c2_port)) - pr_debug("%s: tx timeout\n", netdev->name); - - c2_tx_clean(c2_port); -} - -static int c2_change_mtu(struct net_device *netdev, int new_mtu) -{ - int ret = 0; - - if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) - return -EINVAL; - - netdev->mtu = new_mtu; - - if (netif_running(netdev)) { - c2_down(netdev); - - c2_up(netdev); - } - - return ret; -} - -static const struct net_device_ops c2_netdev = { - .ndo_open = c2_up, - .ndo_stop = c2_down, - .ndo_start_xmit = c2_xmit_frame, - .ndo_tx_timeout = c2_tx_timeout, - .ndo_change_mtu = c2_change_mtu, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -}; - -/* Initialize network device */ -static struct net_device *c2_devinit(struct c2_dev *c2dev, - void __iomem * mmio_addr) -{ - struct c2_port *c2_port = NULL; - struct net_device *netdev = alloc_etherdev(sizeof(*c2_port)); - - if (!netdev) { - pr_debug("c2_port etherdev alloc failed"); - return NULL; - } - - SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); - - netdev->netdev_ops = &c2_netdev; - netdev->watchdog_timeo = C2_TX_TIMEOUT; - netdev->irq = c2dev->pcidev->irq; - - c2_port = netdev_priv(netdev); - c2_port->netdev = netdev; - c2_port->c2dev = c2dev; - c2_port->msg_enable = netif_msg_init(debug, default_msg); - c2_port->tx_ring.count = C2_NUM_TX_DESC; - c2_port->rx_ring.count = C2_NUM_RX_DESC; - - spin_lock_init(&c2_port->tx_lock); - - /* Copy our 48-bit ethernet hardware address */ - memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6); - - /* Validate the MAC address */ - if (!is_valid_ether_addr(netdev->dev_addr)) { - pr_debug("Invalid MAC Address\n"); - c2_print_macaddr(netdev); - free_netdev(netdev); - return NULL; - } - - c2dev->netdev = netdev; - - return netdev; -} - -static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) -{ - int ret = 0, i; - unsigned long reg0_start, reg0_flags, reg0_len; - unsigned long reg2_start, reg2_flags, reg2_len; - unsigned long reg4_start, reg4_flags, reg4_len; - unsigned kva_map_size; - struct net_device *netdev = NULL; - struct c2_dev *c2dev = NULL; - void __iomem *mmio_regs = NULL; - - printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n", - DRV_VERSION); - - /* Enable PCI device */ - ret = pci_enable_device(pcidev); - if (ret) { - printk(KERN_ERR PFX "%s: Unable to enable PCI device\n", - pci_name(pcidev)); - goto bail0; - } - - reg0_start = pci_resource_start(pcidev, BAR_0); - reg0_len = pci_resource_len(pcidev, BAR_0); - reg0_flags = pci_resource_flags(pcidev, BAR_0); - - reg2_start = pci_resource_start(pcidev, BAR_2); - reg2_len = pci_resource_len(pcidev, BAR_2); - reg2_flags = pci_resource_flags(pcidev, BAR_2); - - reg4_start = pci_resource_start(pcidev, BAR_4); - reg4_len = pci_resource_len(pcidev, BAR_4); - reg4_flags = pci_resource_flags(pcidev, BAR_4); - - pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len); - pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len); - pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len); - - /* Make sure PCI base addr are MMIO */ - if (!(reg0_flags & IORESOURCE_MEM) || - !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) { - printk(KERN_ERR PFX "PCI regions not an MMIO resource\n"); - ret = -ENODEV; - goto bail1; - } - - /* Check for weird/broken PCI region reporting */ - if ((reg0_len < C2_REG0_SIZE) || - (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) { - printk(KERN_ERR PFX "Invalid PCI region sizes\n"); - ret = -ENODEV; - goto bail1; - } - - /* Reserve PCI I/O and memory resources */ - ret = pci_request_regions(pcidev, DRV_NAME); - if (ret) { - printk(KERN_ERR PFX "%s: Unable to request regions\n", - pci_name(pcidev)); - goto bail1; - } - - if ((sizeof(dma_addr_t) > 4)) { - ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64)); - if (ret < 0) { - printk(KERN_ERR PFX "64b DMA configuration failed\n"); - goto bail2; - } - } else { - ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); - if (ret < 0) { - printk(KERN_ERR PFX "32b DMA configuration failed\n"); - goto bail2; - } - } - - /* Enables bus-mastering on the device */ - pci_set_master(pcidev); - - /* Remap the adapter PCI registers in BAR4 */ - mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, - sizeof(struct c2_adapter_pci_regs)); - if (!mmio_regs) { - printk(KERN_ERR PFX - "Unable to remap adapter PCI registers in BAR4\n"); - ret = -EIO; - goto bail2; - } - - /* Validate PCI regs magic */ - for (i = 0; i < sizeof(c2_magic); i++) { - if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) { - printk(KERN_ERR PFX "Downlevel Firmware boot loader " - "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash " - "utility to update your boot loader\n", - i + 1, sizeof(c2_magic), - readb(mmio_regs + C2_REGS_MAGIC + i), - c2_magic[i]); - printk(KERN_ERR PFX "Adapter not claimed\n"); - iounmap(mmio_regs); - ret = -EIO; - goto bail2; - } - } - - /* Validate the adapter version */ - if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) { - printk(KERN_ERR PFX "Version mismatch " - "[fw=%u, c2=%u], Adapter not claimed\n", - be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)), - C2_VERSION); - ret = -EINVAL; - iounmap(mmio_regs); - goto bail2; - } - - /* Validate the adapter IVN */ - if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) { - printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using " - "the OpenIB device support kit. " - "[fw=0x%x, c2=0x%x], Adapter not claimed\n", - be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)), - C2_IVN); - ret = -EINVAL; - iounmap(mmio_regs); - goto bail2; - } - - /* Allocate hardware structure */ - c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev)); - if (!c2dev) { - printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", - pci_name(pcidev)); - ret = -ENOMEM; - iounmap(mmio_regs); - goto bail2; - } - - memset(c2dev, 0, sizeof(*c2dev)); - spin_lock_init(&c2dev->lock); - c2dev->pcidev = pcidev; - c2dev->cur_tx = 0; - - /* Get the last RX index */ - c2dev->cur_rx = - (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) - - 0xffffc000) / sizeof(struct c2_rxp_desc); - - /* Request an interrupt line for the driver */ - ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev); - if (ret) { - printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", - pci_name(pcidev), pcidev->irq); - iounmap(mmio_regs); - goto bail3; - } - - /* Set driver specific data */ - pci_set_drvdata(pcidev, c2dev); - - /* Initialize network device */ - if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) { - ret = -ENOMEM; - iounmap(mmio_regs); - goto bail4; - } - - /* Save off the actual size prior to unmapping mmio_regs */ - kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE)); - - /* Unmap the adapter PCI registers in BAR4 */ - iounmap(mmio_regs); - - /* Register network device */ - ret = register_netdev(netdev); - if (ret) { - printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", - ret); - goto bail5; - } - - /* Disable network packets */ - netif_stop_queue(netdev); - - /* Remap the adapter HRXDQ PA space to kernel VA space */ - c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET, - C2_RXP_HRXDQ_SIZE); - if (!c2dev->mmio_rxp_ring) { - printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n"); - ret = -EIO; - goto bail6; - } - - /* Remap the adapter HTXDQ PA space to kernel VA space */ - c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET, - C2_TXP_HTXDQ_SIZE); - if (!c2dev->mmio_txp_ring) { - printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n"); - ret = -EIO; - goto bail7; - } - - /* Save off the current RX index in the last 4 bytes of the TXP Ring */ - C2_SET_CUR_RX(c2dev, c2dev->cur_rx); - - /* Remap the PCI registers in adapter BAR0 to kernel VA space */ - c2dev->regs = ioremap_nocache(reg0_start, reg0_len); - if (!c2dev->regs) { - printk(KERN_ERR PFX "Unable to remap BAR0\n"); - ret = -EIO; - goto bail8; - } - - /* Remap the PCI registers in adapter BAR4 to kernel VA space */ - c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET; - c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, - kva_map_size); - if (!c2dev->kva) { - printk(KERN_ERR PFX "Unable to remap BAR4\n"); - ret = -EIO; - goto bail9; - } - - /* Print out the MAC address */ - c2_print_macaddr(netdev); - - ret = c2_rnic_init(c2dev); - if (ret) { - printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret); - goto bail10; - } - - ret = c2_register_device(c2dev); - if (ret) - goto bail10; - - return 0; - - bail10: - iounmap(c2dev->kva); - - bail9: - iounmap(c2dev->regs); - - bail8: - iounmap(c2dev->mmio_txp_ring); - - bail7: - iounmap(c2dev->mmio_rxp_ring); - - bail6: - unregister_netdev(netdev); - - bail5: - free_netdev(netdev); - - bail4: - free_irq(pcidev->irq, c2dev); - - bail3: - ib_dealloc_device(&c2dev->ibdev); - - bail2: - pci_release_regions(pcidev); - - bail1: - pci_disable_device(pcidev); - - bail0: - return ret; -} - -static void c2_remove(struct pci_dev *pcidev) -{ - struct c2_dev *c2dev = pci_get_drvdata(pcidev); - struct net_device *netdev = c2dev->netdev; - - /* Unregister with OpenIB */ - c2_unregister_device(c2dev); - - /* Clean up the RNIC resources */ - c2_rnic_term(c2dev); - - /* Remove network device from the kernel */ - unregister_netdev(netdev); - - /* Free network device */ - free_netdev(netdev); - - /* Free the interrupt line */ - free_irq(pcidev->irq, c2dev); - - /* missing: Turn LEDs off here */ - - /* Unmap adapter PA space */ - iounmap(c2dev->kva); - iounmap(c2dev->regs); - iounmap(c2dev->mmio_txp_ring); - iounmap(c2dev->mmio_rxp_ring); - - /* Free the hardware structure */ - ib_dealloc_device(&c2dev->ibdev); - - /* Release reserved PCI I/O and memory resources */ - pci_release_regions(pcidev); - - /* Disable PCI device */ - pci_disable_device(pcidev); - - /* Clear driver specific data */ - pci_set_drvdata(pcidev, NULL); -} - -static struct pci_driver c2_pci_driver = { - .name = DRV_NAME, - .id_table = c2_pci_table, - .probe = c2_probe, - .remove = c2_remove, -}; - -module_pci_driver(c2_pci_driver); diff --git a/kernel/drivers/infiniband/hw/amso1100/c2.h b/kernel/drivers/infiniband/hw/amso1100/c2.h deleted file mode 100644 index d619d7358..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2.h +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __C2_H -#define __C2_H - -#include <linux/netdevice.h> -#include <linux/spinlock.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/idr.h> - -#include "c2_provider.h" -#include "c2_mq.h" -#include "c2_status.h" - -#define DRV_NAME "c2" -#define DRV_VERSION "1.1" -#define PFX DRV_NAME ": " - -#define BAR_0 0 -#define BAR_2 2 -#define BAR_4 4 - -#define RX_BUF_SIZE (1536 + 8) -#define ETH_JUMBO_MTU 9000 -#define C2_MAGIC "CEPHEUS" -#define C2_VERSION 4 -#define C2_IVN (18 & 0x7fffffff) - -#define C2_REG0_SIZE (16 * 1024) -#define C2_REG2_SIZE (2 * 1024 * 1024) -#define C2_REG4_SIZE (256 * 1024 * 1024) -#define C2_NUM_TX_DESC 341 -#define C2_NUM_RX_DESC 256 -#define C2_PCI_REGS_OFFSET (0x10000) -#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2)) -#define C2_RXP_HRXDQ_SIZE (4096) -#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE) -#define C2_TXP_HTXDQ_SIZE (4096) -#define C2_TX_TIMEOUT (6*HZ) - -/* CEPHEUS */ -static const u8 c2_magic[] = { - 0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53 -}; - -enum adapter_pci_regs { - C2_REGS_MAGIC = 0x0000, - C2_REGS_VERS = 0x0008, - C2_REGS_IVN = 0x000C, - C2_REGS_PCI_WINSIZE = 0x0010, - C2_REGS_Q0_QSIZE = 0x0014, - C2_REGS_Q0_MSGSIZE = 0x0018, - C2_REGS_Q0_POOLSTART = 0x001C, - C2_REGS_Q0_SHARED = 0x0020, - C2_REGS_Q1_QSIZE = 0x0024, - C2_REGS_Q1_MSGSIZE = 0x0028, - C2_REGS_Q1_SHARED = 0x0030, - C2_REGS_Q2_QSIZE = 0x0034, - C2_REGS_Q2_MSGSIZE = 0x0038, - C2_REGS_Q2_SHARED = 0x0040, - C2_REGS_ENADDR = 0x004C, - C2_REGS_RDMA_ENADDR = 0x0054, - C2_REGS_HRX_CUR = 0x006C, -}; - -struct c2_adapter_pci_regs { - char reg_magic[8]; - u32 version; - u32 ivn; - u32 pci_window_size; - u32 q0_q_size; - u32 q0_msg_size; - u32 q0_pool_start; - u32 q0_shared; - u32 q1_q_size; - u32 q1_msg_size; - u32 q1_pool_start; - u32 q1_shared; - u32 q2_q_size; - u32 q2_msg_size; - u32 q2_pool_start; - u32 q2_shared; - u32 log_start; - u32 log_size; - u8 host_enaddr[8]; - u8 rdma_enaddr[8]; - u32 crash_entry; - u32 crash_ready[2]; - u32 fw_txd_cur; - u32 fw_hrxd_cur; - u32 fw_rxd_cur; -}; - -enum pci_regs { - C2_HISR = 0x0000, - C2_DISR = 0x0004, - C2_HIMR = 0x0008, - C2_DIMR = 0x000C, - C2_NISR0 = 0x0010, - C2_NISR1 = 0x0014, - C2_NIMR0 = 0x0018, - C2_NIMR1 = 0x001C, - C2_IDIS = 0x0020, -}; - -enum { - C2_PCI_HRX_INT = 1 << 8, - C2_PCI_HTX_INT = 1 << 17, - C2_PCI_HRX_QUI = 1 << 31, -}; - -/* - * Cepheus registers in BAR0. - */ -struct c2_pci_regs { - u32 hostisr; - u32 dmaisr; - u32 hostimr; - u32 dmaimr; - u32 netisr0; - u32 netisr1; - u32 netimr0; - u32 netimr1; - u32 int_disable; -}; - -/* TXP flags */ -enum c2_txp_flags { - TXP_HTXD_DONE = 0, - TXP_HTXD_READY = 1 << 0, - TXP_HTXD_UNINIT = 1 << 1, -}; - -/* RXP flags */ -enum c2_rxp_flags { - RXP_HRXD_UNINIT = 0, - RXP_HRXD_READY = 1 << 0, - RXP_HRXD_DONE = 1 << 1, -}; - -/* RXP status */ -enum c2_rxp_status { - RXP_HRXD_ZERO = 0, - RXP_HRXD_OK = 1 << 0, - RXP_HRXD_BUF_OV = 1 << 1, -}; - -/* TXP descriptor fields */ -enum txp_desc { - C2_TXP_FLAGS = 0x0000, - C2_TXP_LEN = 0x0002, - C2_TXP_ADDR = 0x0004, -}; - -/* RXP descriptor fields */ -enum rxp_desc { - C2_RXP_FLAGS = 0x0000, - C2_RXP_STATUS = 0x0002, - C2_RXP_COUNT = 0x0004, - C2_RXP_LEN = 0x0006, - C2_RXP_ADDR = 0x0008, -}; - -struct c2_txp_desc { - u16 flags; - u16 len; - u64 addr; -} __attribute__ ((packed)); - -struct c2_rxp_desc { - u16 flags; - u16 status; - u16 count; - u16 len; - u64 addr; -} __attribute__ ((packed)); - -struct c2_rxp_hdr { - u16 flags; - u16 status; - u16 len; - u16 rsvd; -} __attribute__ ((packed)); - -struct c2_tx_desc { - u32 len; - u32 status; - dma_addr_t next_offset; -}; - -struct c2_rx_desc { - u32 len; - u32 status; - dma_addr_t next_offset; -}; - -struct c2_alloc { - u32 last; - u32 max; - spinlock_t lock; - unsigned long *table; -}; - -struct c2_array { - struct { - void **page; - int used; - } *page_list; -}; - -/* - * The MQ shared pointer pool is organized as a linked list of - * chunks. Each chunk contains a linked list of free shared pointers - * that can be allocated to a given user mode client. - * - */ -struct sp_chunk { - struct sp_chunk *next; - dma_addr_t dma_addr; - DEFINE_DMA_UNMAP_ADDR(mapping); - u16 head; - u16 shared_ptr[0]; -}; - -struct c2_pd_table { - u32 last; - u32 max; - spinlock_t lock; - unsigned long *table; -}; - -struct c2_qp_table { - struct idr idr; - spinlock_t lock; -}; - -struct c2_element { - struct c2_element *next; - void *ht_desc; /* host descriptor */ - void __iomem *hw_desc; /* hardware descriptor */ - struct sk_buff *skb; - dma_addr_t mapaddr; - u32 maplen; -}; - -struct c2_ring { - struct c2_element *to_clean; - struct c2_element *to_use; - struct c2_element *start; - unsigned long count; -}; - -struct c2_dev { - struct ib_device ibdev; - void __iomem *regs; - void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */ - void __iomem *mmio_rxp_ring; - spinlock_t lock; - struct pci_dev *pcidev; - struct net_device *netdev; - struct net_device *pseudo_netdev; - unsigned int cur_tx; - unsigned int cur_rx; - u32 adapter_handle; - int device_cap_flags; - void __iomem *kva; /* KVA device memory */ - unsigned long pa; /* PA device memory */ - void **qptr_array; - - struct kmem_cache *host_msg_cache; - - struct list_head cca_link; /* adapter list */ - struct list_head eh_wakeup_list; /* event wakeup list */ - wait_queue_head_t req_vq_wo; - - /* Cached RNIC properties */ - struct ib_device_attr props; - - struct c2_pd_table pd_table; - struct c2_qp_table qp_table; - int ports; /* num of GigE ports */ - int devnum; - spinlock_t vqlock; /* sync vbs req MQ */ - - /* Verbs Queues */ - struct c2_mq req_vq; /* Verbs Request MQ */ - struct c2_mq rep_vq; /* Verbs Reply MQ */ - struct c2_mq aeq; /* Async Events MQ */ - - /* Kernel client MQs */ - struct sp_chunk *kern_mqsp_pool; - - /* Device updates these values when posting messages to a host - * target queue */ - u16 req_vq_shared; - u16 rep_vq_shared; - u16 aeq_shared; - u16 irq_claimed; - - /* - * Shared host target pages for user-accessible MQs. - */ - int hthead; /* index of first free entry */ - void *htpages; /* kernel vaddr */ - int htlen; /* length of htpages memory */ - void *htuva; /* user mapped vaddr */ - spinlock_t htlock; /* serialize allocation */ - - u64 adapter_hint_uva; /* access to the activity FIFO */ - - // spinlock_t aeq_lock; - // spinlock_t rnic_lock; - - __be16 *hint_count; - dma_addr_t hint_count_dma; - u16 hints_read; - - int init; /* TRUE if it's ready */ - char ae_cache_name[16]; - char vq_cache_name[16]; -}; - -struct c2_port { - u32 msg_enable; - struct c2_dev *c2dev; - struct net_device *netdev; - - spinlock_t tx_lock; - u32 tx_avail; - struct c2_ring tx_ring; - struct c2_ring rx_ring; - - void *mem; /* PCI memory for host rings */ - dma_addr_t dma; - unsigned long mem_size; - - u32 rx_buf_size; -}; - -/* - * Activity FIFO registers in BAR0. - */ -#define PCI_BAR0_HOST_HINT 0x100 -#define PCI_BAR0_ADAPTER_HINT 0x2000 - -/* - * Ammasso PCI vendor id and Cepheus PCI device id. - */ -#define CQ_ARMED 0x01 -#define CQ_WAIT_FOR_DMA 0x80 - -/* - * The format of a hint is as follows: - * Lower 16 bits are the count of hints for the queue. - * Next 15 bits are the qp_index - * Upper most bit depends on who reads it: - * If read by producer, then it means Full (1) or Not-Full (0) - * If read by consumer, then it means Empty (1) or Not-Empty (0) - */ -#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count) -#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16) -#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF) - - -/* - * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t - * struct. - */ -#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000 - -#ifndef readq -static inline u64 readq(const void __iomem * addr) -{ - u64 ret = readl(addr + 4); - ret <<= 32; - ret |= readl(addr); - - return ret; -} -#endif - -#ifndef writeq -static inline void __raw_writeq(u64 val, void __iomem * addr) -{ - __raw_writel((u32) (val), addr); - __raw_writel((u32) (val >> 32), (addr + 4)); -} -#endif - -#define C2_SET_CUR_RX(c2dev, cur_rx) \ - __raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092) - -#define C2_GET_CUR_RX(c2dev) \ - be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092)) - -static inline struct c2_dev *to_c2dev(struct ib_device *ibdev) -{ - return container_of(ibdev, struct c2_dev, ibdev); -} - -static inline int c2_errno(void *reply) -{ - switch (c2_wr_get_result(reply)) { - case C2_OK: - return 0; - case CCERR_NO_BUFS: - case CCERR_INSUFFICIENT_RESOURCES: - case CCERR_ZERO_RDMA_READ_RESOURCES: - return -ENOMEM; - case CCERR_MR_IN_USE: - case CCERR_QP_IN_USE: - return -EBUSY; - case CCERR_ADDR_IN_USE: - return -EADDRINUSE; - case CCERR_ADDR_NOT_AVAIL: - return -EADDRNOTAVAIL; - case CCERR_CONN_RESET: - return -ECONNRESET; - case CCERR_NOT_IMPLEMENTED: - case CCERR_INVALID_WQE: - return -ENOSYS; - case CCERR_QP_NOT_PRIVILEGED: - return -EPERM; - case CCERR_STACK_ERROR: - return -EPROTO; - case CCERR_ACCESS_VIOLATION: - case CCERR_BASE_AND_BOUNDS_VIOLATION: - return -EFAULT; - case CCERR_STAG_STATE_NOT_INVALID: - case CCERR_INVALID_ADDRESS: - case CCERR_INVALID_CQ: - case CCERR_INVALID_EP: - case CCERR_INVALID_MODIFIER: - case CCERR_INVALID_MTU: - case CCERR_INVALID_PD_ID: - case CCERR_INVALID_QP: - case CCERR_INVALID_RNIC: - case CCERR_INVALID_STAG: - return -EINVAL; - default: - return -EAGAIN; - } -} - -/* Device */ -extern int c2_register_device(struct c2_dev *c2dev); -extern void c2_unregister_device(struct c2_dev *c2dev); -extern int c2_rnic_init(struct c2_dev *c2dev); -extern void c2_rnic_term(struct c2_dev *c2dev); -extern void c2_rnic_interrupt(struct c2_dev *c2dev); -extern int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask); -extern int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask); - -/* QPs */ -extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd, - struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp); -extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp); -extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn); -extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, - struct ib_qp_attr *attr, int attr_mask); -extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp, - int ord, int ird); -extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, - struct ib_send_wr **bad_wr); -extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, - struct ib_recv_wr **bad_wr); -extern void c2_init_qp_table(struct c2_dev *c2dev); -extern void c2_cleanup_qp_table(struct c2_dev *c2dev); -extern void c2_set_qp_state(struct c2_qp *, int); -extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn); - -/* PDs */ -extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd); -extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd); -extern int c2_init_pd_table(struct c2_dev *c2dev); -extern void c2_cleanup_pd_table(struct c2_dev *c2dev); - -/* CQs */ -extern int c2_init_cq(struct c2_dev *c2dev, int entries, - struct c2_ucontext *ctx, struct c2_cq *cq); -extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq); -extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index); -extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index); -extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); - -/* CM */ -extern int c2_llp_connect(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *iw_param); -extern int c2_llp_accept(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *iw_param); -extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, - u8 pdata_len); -extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog); -extern int c2_llp_service_destroy(struct iw_cm_id *cm_id); - -/* MM */ -extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list, - int page_size, int pbl_depth, u32 length, - u32 off, u64 *va, enum c2_acf acf, - struct c2_mr *mr); -extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index); - -/* AE */ -extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index); - -/* MQSP Allocator */ -extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask, - struct sp_chunk **root); -extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root); -extern __be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, - dma_addr_t *dma_addr, gfp_t gfp_mask); -extern void c2_free_mqsp(__be16* mqsp); -#endif diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_ae.c b/kernel/drivers/infiniband/hw/amso1100/c2_ae.c deleted file mode 100644 index cedda2523..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_ae.c +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "c2.h" -#include <rdma/iw_cm.h> -#include "c2_status.h" -#include "c2_ae.h" - -static int c2_convert_cm_status(u32 c2_status) -{ - switch (c2_status) { - case C2_CONN_STATUS_SUCCESS: - return 0; - case C2_CONN_STATUS_REJECTED: - return -ENETRESET; - case C2_CONN_STATUS_REFUSED: - return -ECONNREFUSED; - case C2_CONN_STATUS_TIMEDOUT: - return -ETIMEDOUT; - case C2_CONN_STATUS_NETUNREACH: - return -ENETUNREACH; - case C2_CONN_STATUS_HOSTUNREACH: - return -EHOSTUNREACH; - case C2_CONN_STATUS_INVALID_RNIC: - return -EINVAL; - case C2_CONN_STATUS_INVALID_QP: - return -EINVAL; - case C2_CONN_STATUS_INVALID_QP_STATE: - return -EINVAL; - case C2_CONN_STATUS_ADDR_NOT_AVAIL: - return -EADDRNOTAVAIL; - default: - printk(KERN_ERR PFX - "%s - Unable to convert CM status: %d\n", - __func__, c2_status); - return -EIO; - } -} - -static const char* to_event_str(int event) -{ - static const char* event_str[] = { - "CCAE_REMOTE_SHUTDOWN", - "CCAE_ACTIVE_CONNECT_RESULTS", - "CCAE_CONNECTION_REQUEST", - "CCAE_LLP_CLOSE_COMPLETE", - "CCAE_TERMINATE_MESSAGE_RECEIVED", - "CCAE_LLP_CONNECTION_RESET", - "CCAE_LLP_CONNECTION_LOST", - "CCAE_LLP_SEGMENT_SIZE_INVALID", - "CCAE_LLP_INVALID_CRC", - "CCAE_LLP_BAD_FPDU", - "CCAE_INVALID_DDP_VERSION", - "CCAE_INVALID_RDMA_VERSION", - "CCAE_UNEXPECTED_OPCODE", - "CCAE_INVALID_DDP_QUEUE_NUMBER", - "CCAE_RDMA_READ_NOT_ENABLED", - "CCAE_RDMA_WRITE_NOT_ENABLED", - "CCAE_RDMA_READ_TOO_SMALL", - "CCAE_NO_L_BIT", - "CCAE_TAGGED_INVALID_STAG", - "CCAE_TAGGED_BASE_BOUNDS_VIOLATION", - "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION", - "CCAE_TAGGED_INVALID_PD", - "CCAE_WRAP_ERROR", - "CCAE_BAD_CLOSE", - "CCAE_BAD_LLP_CLOSE", - "CCAE_INVALID_MSN_RANGE", - "CCAE_INVALID_MSN_GAP", - "CCAE_IRRQ_OVERFLOW", - "CCAE_IRRQ_MSN_GAP", - "CCAE_IRRQ_MSN_RANGE", - "CCAE_IRRQ_INVALID_STAG", - "CCAE_IRRQ_BASE_BOUNDS_VIOLATION", - "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION", - "CCAE_IRRQ_INVALID_PD", - "CCAE_IRRQ_WRAP_ERROR", - "CCAE_CQ_SQ_COMPLETION_OVERFLOW", - "CCAE_CQ_RQ_COMPLETION_ERROR", - "CCAE_QP_SRQ_WQE_ERROR", - "CCAE_QP_LOCAL_CATASTROPHIC_ERROR", - "CCAE_CQ_OVERFLOW", - "CCAE_CQ_OPERATION_ERROR", - "CCAE_SRQ_LIMIT_REACHED", - "CCAE_QP_RQ_LIMIT_REACHED", - "CCAE_SRQ_CATASTROPHIC_ERROR", - "CCAE_RNIC_CATASTROPHIC_ERROR" - }; - - if (event < CCAE_REMOTE_SHUTDOWN || - event > CCAE_RNIC_CATASTROPHIC_ERROR) - return "<invalid event>"; - - event -= CCAE_REMOTE_SHUTDOWN; - return event_str[event]; -} - -static const char *to_qp_state_str(int state) -{ - switch (state) { - case C2_QP_STATE_IDLE: - return "C2_QP_STATE_IDLE"; - case C2_QP_STATE_CONNECTING: - return "C2_QP_STATE_CONNECTING"; - case C2_QP_STATE_RTS: - return "C2_QP_STATE_RTS"; - case C2_QP_STATE_CLOSING: - return "C2_QP_STATE_CLOSING"; - case C2_QP_STATE_TERMINATE: - return "C2_QP_STATE_TERMINATE"; - case C2_QP_STATE_ERROR: - return "C2_QP_STATE_ERROR"; - default: - return "<invalid QP state>"; - } -} - -void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) -{ - struct c2_mq *mq = c2dev->qptr_array[mq_index]; - union c2wr *wr; - void *resource_user_context; - struct iw_cm_event cm_event; - struct ib_event ib_event; - enum c2_resource_indicator resource_indicator; - enum c2_event_id event_id; - unsigned long flags; - int status; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr; - - /* - * retrieve the message - */ - wr = c2_mq_consume(mq); - if (!wr) - return; - - memset(&ib_event, 0, sizeof(ib_event)); - memset(&cm_event, 0, sizeof(cm_event)); - - event_id = c2_wr_get_id(wr); - resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type); - resource_user_context = - (void *) (unsigned long) wr->ae.ae_generic.user_context; - - status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr)); - - pr_debug("event received c2_dev=%p, event_id=%d, " - "resource_indicator=%d, user_context=%p, status = %d\n", - c2dev, event_id, resource_indicator, resource_user_context, - status); - - switch (resource_indicator) { - case C2_RES_IND_QP:{ - - struct c2_qp *qp = (struct c2_qp *)resource_user_context; - struct iw_cm_id *cm_id = qp->cm_id; - struct c2wr_ae_active_connect_results *res; - - if (!cm_id) { - pr_debug("event received, but cm_id is <nul>, qp=%p!\n", - qp); - goto ignore_it; - } - pr_debug("%s: event = %s, user_context=%llx, " - "resource_type=%x, " - "resource=%x, qp_state=%s\n", - __func__, - to_event_str(event_id), - (unsigned long long) wr->ae.ae_generic.user_context, - be32_to_cpu(wr->ae.ae_generic.resource_type), - be32_to_cpu(wr->ae.ae_generic.resource), - to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state))); - - c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state)); - - switch (event_id) { - case CCAE_ACTIVE_CONNECT_RESULTS: - res = &wr->ae.ae_active_connect_results; - cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - laddr->sin_addr.s_addr = res->laddr; - raddr->sin_addr.s_addr = res->raddr; - laddr->sin_port = res->lport; - raddr->sin_port = res->rport; - if (status == 0) { - cm_event.private_data_len = - be32_to_cpu(res->private_data_length); - cm_event.private_data = res->private_data; - } else { - spin_lock_irqsave(&qp->lock, flags); - if (qp->cm_id) { - qp->cm_id->rem_ref(qp->cm_id); - qp->cm_id = NULL; - } - spin_unlock_irqrestore(&qp->lock, flags); - cm_event.private_data_len = 0; - cm_event.private_data = NULL; - } - if (cm_id->event_handler) - cm_id->event_handler(cm_id, &cm_event); - break; - case CCAE_TERMINATE_MESSAGE_RECEIVED: - case CCAE_CQ_SQ_COMPLETION_OVERFLOW: - ib_event.device = &c2dev->ibdev; - ib_event.element.qp = &qp->ibqp; - ib_event.event = IB_EVENT_QP_REQ_ERR; - - if (qp->ibqp.event_handler) - qp->ibqp.event_handler(&ib_event, - qp->ibqp. - qp_context); - break; - case CCAE_BAD_CLOSE: - case CCAE_LLP_CLOSE_COMPLETE: - case CCAE_LLP_CONNECTION_RESET: - case CCAE_LLP_CONNECTION_LOST: - BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b); - - spin_lock_irqsave(&qp->lock, flags); - if (qp->cm_id) { - qp->cm_id->rem_ref(qp->cm_id); - qp->cm_id = NULL; - } - spin_unlock_irqrestore(&qp->lock, flags); - cm_event.event = IW_CM_EVENT_CLOSE; - cm_event.status = 0; - if (cm_id->event_handler) - cm_id->event_handler(cm_id, &cm_event); - break; - default: - BUG_ON(1); - pr_debug("%s:%d Unexpected event_id=%d on QP=%p, " - "CM_ID=%p\n", - __func__, __LINE__, - event_id, qp, cm_id); - break; - } - break; - } - - case C2_RES_IND_EP:{ - - struct c2wr_ae_connection_request *req = - &wr->ae.ae_connection_request; - struct iw_cm_id *cm_id = - (struct iw_cm_id *)resource_user_context; - - pr_debug("C2_RES_IND_EP event_id=%d\n", event_id); - if (event_id != CCAE_CONNECTION_REQUEST) { - pr_debug("%s: Invalid event_id: %d\n", - __func__, event_id); - break; - } - cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; - cm_event.provider_data = (void*)(unsigned long)req->cr_handle; - laddr->sin_addr.s_addr = req->laddr; - raddr->sin_addr.s_addr = req->raddr; - laddr->sin_port = req->lport; - raddr->sin_port = req->rport; - cm_event.private_data_len = - be32_to_cpu(req->private_data_length); - cm_event.private_data = req->private_data; - /* - * Until ird/ord negotiation via MPAv2 support is added, send - * max supported values - */ - cm_event.ird = cm_event.ord = 128; - - if (cm_id->event_handler) - cm_id->event_handler(cm_id, &cm_event); - break; - } - - case C2_RES_IND_CQ:{ - struct c2_cq *cq = - (struct c2_cq *) resource_user_context; - - pr_debug("IB_EVENT_CQ_ERR\n"); - ib_event.device = &c2dev->ibdev; - ib_event.element.cq = &cq->ibcq; - ib_event.event = IB_EVENT_CQ_ERR; - - if (cq->ibcq.event_handler) - cq->ibcq.event_handler(&ib_event, - cq->ibcq.cq_context); - break; - } - - default: - printk("Bad resource indicator = %d\n", - resource_indicator); - break; - } - - ignore_it: - c2_mq_free(mq); -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_ae.h b/kernel/drivers/infiniband/hw/amso1100/c2_ae.h deleted file mode 100644 index 3a065c33b..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_ae.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _C2_AE_H_ -#define _C2_AE_H_ - -/* - * WARNING: If you change this file, also bump C2_IVN_BASE - * in common/include/clustercore/c2_ivn.h. - */ - -/* - * Asynchronous Event Identifiers - * - * These start at 0x80 only so it's obvious from inspection that - * they are not work-request statuses. This isn't critical. - * - * NOTE: these event id's must fit in eight bits. - */ -enum c2_event_id { - CCAE_REMOTE_SHUTDOWN = 0x80, - CCAE_ACTIVE_CONNECT_RESULTS, - CCAE_CONNECTION_REQUEST, - CCAE_LLP_CLOSE_COMPLETE, - CCAE_TERMINATE_MESSAGE_RECEIVED, - CCAE_LLP_CONNECTION_RESET, - CCAE_LLP_CONNECTION_LOST, - CCAE_LLP_SEGMENT_SIZE_INVALID, - CCAE_LLP_INVALID_CRC, - CCAE_LLP_BAD_FPDU, - CCAE_INVALID_DDP_VERSION, - CCAE_INVALID_RDMA_VERSION, - CCAE_UNEXPECTED_OPCODE, - CCAE_INVALID_DDP_QUEUE_NUMBER, - CCAE_RDMA_READ_NOT_ENABLED, - CCAE_RDMA_WRITE_NOT_ENABLED, - CCAE_RDMA_READ_TOO_SMALL, - CCAE_NO_L_BIT, - CCAE_TAGGED_INVALID_STAG, - CCAE_TAGGED_BASE_BOUNDS_VIOLATION, - CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION, - CCAE_TAGGED_INVALID_PD, - CCAE_WRAP_ERROR, - CCAE_BAD_CLOSE, - CCAE_BAD_LLP_CLOSE, - CCAE_INVALID_MSN_RANGE, - CCAE_INVALID_MSN_GAP, - CCAE_IRRQ_OVERFLOW, - CCAE_IRRQ_MSN_GAP, - CCAE_IRRQ_MSN_RANGE, - CCAE_IRRQ_INVALID_STAG, - CCAE_IRRQ_BASE_BOUNDS_VIOLATION, - CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION, - CCAE_IRRQ_INVALID_PD, - CCAE_IRRQ_WRAP_ERROR, - CCAE_CQ_SQ_COMPLETION_OVERFLOW, - CCAE_CQ_RQ_COMPLETION_ERROR, - CCAE_QP_SRQ_WQE_ERROR, - CCAE_QP_LOCAL_CATASTROPHIC_ERROR, - CCAE_CQ_OVERFLOW, - CCAE_CQ_OPERATION_ERROR, - CCAE_SRQ_LIMIT_REACHED, - CCAE_QP_RQ_LIMIT_REACHED, - CCAE_SRQ_CATASTROPHIC_ERROR, - CCAE_RNIC_CATASTROPHIC_ERROR -/* WARNING If you add more id's, make sure their values fit in eight bits. */ -}; - -/* - * Resource Indicators and Identifiers - */ -enum c2_resource_indicator { - C2_RES_IND_QP = 1, - C2_RES_IND_EP, - C2_RES_IND_CQ, - C2_RES_IND_SRQ, -}; - -#endif /* _C2_AE_H_ */ diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_alloc.c b/kernel/drivers/infiniband/hw/amso1100/c2_alloc.c deleted file mode 100644 index 78d247ec6..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_alloc.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/errno.h> -#include <linux/bitmap.h> - -#include "c2.h" - -static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask, - struct sp_chunk **head) -{ - int i; - struct sp_chunk *new_head; - dma_addr_t dma_addr; - - new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE, - &dma_addr, gfp_mask); - if (new_head == NULL) - return -ENOMEM; - - new_head->dma_addr = dma_addr; - dma_unmap_addr_set(new_head, mapping, new_head->dma_addr); - - new_head->next = NULL; - new_head->head = 0; - - /* build list where each index is the next free slot */ - for (i = 0; - i < (PAGE_SIZE - sizeof(struct sp_chunk) - - sizeof(u16)) / sizeof(u16) - 1; - i++) { - new_head->shared_ptr[i] = i + 1; - } - /* terminate list */ - new_head->shared_ptr[i] = 0xFFFF; - - *head = new_head; - return 0; -} - -int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask, - struct sp_chunk **root) -{ - return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root); -} - -void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root) -{ - struct sp_chunk *next; - - while (root) { - next = root->next; - dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root, - dma_unmap_addr(root, mapping)); - root = next; - } -} - -__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, - dma_addr_t *dma_addr, gfp_t gfp_mask) -{ - u16 mqsp; - - while (head) { - mqsp = head->head; - if (mqsp != 0xFFFF) { - head->head = head->shared_ptr[mqsp]; - break; - } else if (head->next == NULL) { - if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) == - 0) { - head = head->next; - mqsp = head->head; - head->head = head->shared_ptr[mqsp]; - break; - } else - return NULL; - } else - head = head->next; - } - if (head) { - *dma_addr = head->dma_addr + - ((unsigned long) &(head->shared_ptr[mqsp]) - - (unsigned long) head); - pr_debug("%s addr %p dma_addr %llx\n", __func__, - &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr); - return (__force __be16 *) &(head->shared_ptr[mqsp]); - } - return NULL; -} - -void c2_free_mqsp(__be16 *mqsp) -{ - struct sp_chunk *head; - u16 idx; - - /* The chunk containing this ptr begins at the page boundary */ - head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK); - - /* Link head to new mqsp */ - *mqsp = (__force __be16) head->head; - - /* Compute the shared_ptr index */ - idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1; - idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1; - - /* Point this index at the head */ - head->shared_ptr[idx] = head->head; - - /* Point head at this index */ - head->head = idx; -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_cm.c b/kernel/drivers/infiniband/hw/amso1100/c2_cm.c deleted file mode 100644 index 23bfa94fb..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_cm.c +++ /dev/null @@ -1,461 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ -#include <linux/slab.h> - -#include "c2.h" -#include "c2_wr.h" -#include "c2_vq.h" -#include <rdma/iw_cm.h> - -int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) -{ - struct c2_dev *c2dev = to_c2dev(cm_id->device); - struct ib_qp *ibqp; - struct c2_qp *qp; - struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */ - struct c2_vq_req *vq_req; - int err; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; - - if (cm_id->remote_addr.ss_family != AF_INET) - return -ENOSYS; - - ibqp = c2_get_qp(cm_id->device, iw_param->qpn); - if (!ibqp) - return -EINVAL; - qp = to_c2qp(ibqp); - - /* Associate QP <--> CM_ID */ - cm_id->provider_data = qp; - cm_id->add_ref(cm_id); - qp->cm_id = cm_id; - - /* - * only support the max private_data length - */ - if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { - err = -EINVAL; - goto bail0; - } - /* - * Set the rdma read limits - */ - err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird); - if (err) - goto bail0; - - /* - * Create and send a WR_QP_CONNECT... - */ - wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); - if (!wr) { - err = -ENOMEM; - goto bail0; - } - - vq_req = vq_req_alloc(c2dev); - if (!vq_req) { - err = -ENOMEM; - goto bail1; - } - - c2_wr_set_id(wr, CCWR_QP_CONNECT); - wr->hdr.context = 0; - wr->rnic_handle = c2dev->adapter_handle; - wr->qp_handle = qp->adapter_handle; - - wr->remote_addr = raddr->sin_addr.s_addr; - wr->remote_port = raddr->sin_port; - - /* - * Move any private data from the callers's buf into - * the WR. - */ - if (iw_param->private_data) { - wr->private_data_length = - cpu_to_be32(iw_param->private_data_len); - memcpy(&wr->private_data[0], iw_param->private_data, - iw_param->private_data_len); - } else - wr->private_data_length = 0; - - /* - * Send WR to adapter. NOTE: There is no synch reply from - * the adapter. - */ - err = vq_send_wr(c2dev, (union c2wr *) wr); - vq_req_free(c2dev, vq_req); - - bail1: - kfree(wr); - bail0: - if (err) { - /* - * If we fail, release reference on QP and - * disassociate QP from CM_ID - */ - cm_id->provider_data = NULL; - qp->cm_id = NULL; - cm_id->rem_ref(cm_id); - } - return err; -} - -int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog) -{ - struct c2_dev *c2dev; - struct c2wr_ep_listen_create_req wr; - struct c2wr_ep_listen_create_rep *reply; - struct c2_vq_req *vq_req; - int err; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; - - if (cm_id->local_addr.ss_family != AF_INET) - return -ENOSYS; - - c2dev = to_c2dev(cm_id->device); - if (c2dev == NULL) - return -EINVAL; - - /* - * Allocate verbs request. - */ - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - /* - * Build the WR - */ - c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE); - wr.hdr.context = (u64) (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.local_addr = laddr->sin_addr.s_addr; - wr.local_port = laddr->sin_port; - wr.backlog = cpu_to_be32(backlog); - wr.user_context = (u64) (unsigned long) cm_id; - - /* - * Reference the request struct. Dereferenced in the int handler. - */ - vq_req_get(c2dev, vq_req); - - /* - * Send WR to adapter - */ - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - /* - * Wait for reply from adapter - */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail0; - - /* - * Process reply - */ - reply = - (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg; - if (!reply) { - err = -ENOMEM; - goto bail1; - } - - if ((err = c2_errno(reply)) != 0) - goto bail1; - - /* - * Keep the adapter handle. Used in subsequent destroy - */ - cm_id->provider_data = (void*)(unsigned long) reply->ep_handle; - - /* - * free vq stuff - */ - vq_repbuf_free(c2dev, reply); - vq_req_free(c2dev, vq_req); - - return 0; - - bail1: - vq_repbuf_free(c2dev, reply); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - - -int c2_llp_service_destroy(struct iw_cm_id *cm_id) -{ - - struct c2_dev *c2dev; - struct c2wr_ep_listen_destroy_req wr; - struct c2wr_ep_listen_destroy_rep *reply; - struct c2_vq_req *vq_req; - int err; - - c2dev = to_c2dev(cm_id->device); - if (c2dev == NULL) - return -EINVAL; - - /* - * Allocate verbs request. - */ - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - /* - * Build the WR - */ - c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.ep_handle = (u32)(unsigned long)cm_id->provider_data; - - /* - * reference the request struct. dereferenced in the int handler. - */ - vq_req_get(c2dev, vq_req); - - /* - * Send WR to adapter - */ - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - /* - * Wait for reply from adapter - */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail0; - - /* - * Process reply - */ - reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg; - if (!reply) { - err = -ENOMEM; - goto bail0; - } - if ((err = c2_errno(reply)) != 0) - goto bail1; - - bail1: - vq_repbuf_free(c2dev, reply); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - -int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) -{ - struct c2_dev *c2dev = to_c2dev(cm_id->device); - struct c2_qp *qp; - struct ib_qp *ibqp; - struct c2wr_cr_accept_req *wr; /* variable length WR */ - struct c2_vq_req *vq_req; - struct c2wr_cr_accept_rep *reply; /* VQ Reply msg ptr. */ - int err; - - ibqp = c2_get_qp(cm_id->device, iw_param->qpn); - if (!ibqp) - return -EINVAL; - qp = to_c2qp(ibqp); - - /* Set the RDMA read limits */ - err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird); - if (err) - goto bail0; - - /* Allocate verbs request. */ - vq_req = vq_req_alloc(c2dev); - if (!vq_req) { - err = -ENOMEM; - goto bail0; - } - vq_req->qp = qp; - vq_req->cm_id = cm_id; - vq_req->event = IW_CM_EVENT_ESTABLISHED; - - wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); - if (!wr) { - err = -ENOMEM; - goto bail1; - } - - /* Build the WR */ - c2_wr_set_id(wr, CCWR_CR_ACCEPT); - wr->hdr.context = (unsigned long) vq_req; - wr->rnic_handle = c2dev->adapter_handle; - wr->ep_handle = (u32) (unsigned long) cm_id->provider_data; - wr->qp_handle = qp->adapter_handle; - - /* Replace the cr_handle with the QP after accept */ - cm_id->provider_data = qp; - cm_id->add_ref(cm_id); - qp->cm_id = cm_id; - - cm_id->provider_data = qp; - - /* Validate private_data length */ - if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { - err = -EINVAL; - goto bail1; - } - - if (iw_param->private_data) { - wr->private_data_length = cpu_to_be32(iw_param->private_data_len); - memcpy(&wr->private_data[0], - iw_param->private_data, iw_param->private_data_len); - } else - wr->private_data_length = 0; - - /* Reference the request struct. Dereferenced in the int handler. */ - vq_req_get(c2dev, vq_req); - - /* Send WR to adapter */ - err = vq_send_wr(c2dev, (union c2wr *) wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail1; - } - - /* Wait for reply from adapter */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail1; - - /* Check that reply is present */ - reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg; - if (!reply) { - err = -ENOMEM; - goto bail1; - } - - err = c2_errno(reply); - vq_repbuf_free(c2dev, reply); - - if (!err) - c2_set_qp_state(qp, C2_QP_STATE_RTS); - bail1: - kfree(wr); - vq_req_free(c2dev, vq_req); - bail0: - if (err) { - /* - * If we fail, release reference on QP and - * disassociate QP from CM_ID - */ - cm_id->provider_data = NULL; - qp->cm_id = NULL; - cm_id->rem_ref(cm_id); - } - return err; -} - -int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) -{ - struct c2_dev *c2dev; - struct c2wr_cr_reject_req wr; - struct c2_vq_req *vq_req; - struct c2wr_cr_reject_rep *reply; - int err; - - c2dev = to_c2dev(cm_id->device); - - /* - * Allocate verbs request. - */ - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - /* - * Build the WR - */ - c2_wr_set_id(&wr, CCWR_CR_REJECT); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.ep_handle = (u32) (unsigned long) cm_id->provider_data; - - /* - * reference the request struct. dereferenced in the int handler. - */ - vq_req_get(c2dev, vq_req); - - /* - * Send WR to adapter - */ - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - /* - * Wait for reply from adapter - */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail0; - - /* - * Process reply - */ - reply = (struct c2wr_cr_reject_rep *) (unsigned long) - vq_req->reply_msg; - if (!reply) { - err = -ENOMEM; - goto bail0; - } - err = c2_errno(reply); - /* - * free vq stuff - */ - vq_repbuf_free(c2dev, reply); - - bail0: - vq_req_free(c2dev, vq_req); - return err; -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_cq.c b/kernel/drivers/infiniband/hw/amso1100/c2_cq.c deleted file mode 100644 index 1b63185b4..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_cq.c +++ /dev/null @@ -1,440 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * Copyright (c) 2004 Voltaire, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ -#include <linux/gfp.h> - -#include "c2.h" -#include "c2_vq.h" -#include "c2_status.h" - -#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1)) - -static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn) -{ - struct c2_cq *cq; - unsigned long flags; - - spin_lock_irqsave(&c2dev->lock, flags); - cq = c2dev->qptr_array[cqn]; - if (!cq) { - spin_unlock_irqrestore(&c2dev->lock, flags); - return NULL; - } - atomic_inc(&cq->refcount); - spin_unlock_irqrestore(&c2dev->lock, flags); - return cq; -} - -static void c2_cq_put(struct c2_cq *cq) -{ - if (atomic_dec_and_test(&cq->refcount)) - wake_up(&cq->wait); -} - -void c2_cq_event(struct c2_dev *c2dev, u32 mq_index) -{ - struct c2_cq *cq; - - cq = c2_cq_get(c2dev, mq_index); - if (!cq) { - printk("discarding events on destroyed CQN=%d\n", mq_index); - return; - } - - (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); - c2_cq_put(cq); -} - -void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index) -{ - struct c2_cq *cq; - struct c2_mq *q; - - cq = c2_cq_get(c2dev, mq_index); - if (!cq) - return; - - spin_lock_irq(&cq->lock); - q = &cq->mq; - if (q && !c2_mq_empty(q)) { - u16 priv = q->priv; - struct c2wr_ce *msg; - - while (priv != be16_to_cpu(*q->shared)) { - msg = (struct c2wr_ce *) - (q->msg_pool.host + priv * q->msg_size); - if (msg->qp_user_context == (u64) (unsigned long) qp) { - msg->qp_user_context = (u64) 0; - } - priv = (priv + 1) % q->q_size; - } - } - spin_unlock_irq(&cq->lock); - c2_cq_put(cq); -} - -static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status) -{ - switch (status) { - case C2_OK: - return IB_WC_SUCCESS; - case CCERR_FLUSHED: - return IB_WC_WR_FLUSH_ERR; - case CCERR_BASE_AND_BOUNDS_VIOLATION: - return IB_WC_LOC_PROT_ERR; - case CCERR_ACCESS_VIOLATION: - return IB_WC_LOC_ACCESS_ERR; - case CCERR_TOTAL_LENGTH_TOO_BIG: - return IB_WC_LOC_LEN_ERR; - case CCERR_INVALID_WINDOW: - return IB_WC_MW_BIND_ERR; - default: - return IB_WC_GENERAL_ERR; - } -} - - -static inline int c2_poll_one(struct c2_dev *c2dev, - struct c2_cq *cq, struct ib_wc *entry) -{ - struct c2wr_ce *ce; - struct c2_qp *qp; - int is_recv = 0; - - ce = c2_mq_consume(&cq->mq); - if (!ce) { - return -EAGAIN; - } - - /* - * if the qp returned is null then this qp has already - * been freed and we are unable process the completion. - * try pulling the next message - */ - while ((qp = - (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) { - c2_mq_free(&cq->mq); - ce = c2_mq_consume(&cq->mq); - if (!ce) - return -EAGAIN; - } - - entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce)); - entry->wr_id = ce->hdr.context; - entry->qp = &qp->ibqp; - entry->wc_flags = 0; - entry->slid = 0; - entry->sl = 0; - entry->src_qp = 0; - entry->dlid_path_bits = 0; - entry->pkey_index = 0; - - switch (c2_wr_get_id(ce)) { - case C2_WR_TYPE_SEND: - entry->opcode = IB_WC_SEND; - break; - case C2_WR_TYPE_RDMA_WRITE: - entry->opcode = IB_WC_RDMA_WRITE; - break; - case C2_WR_TYPE_RDMA_READ: - entry->opcode = IB_WC_RDMA_READ; - break; - case C2_WR_TYPE_BIND_MW: - entry->opcode = IB_WC_BIND_MW; - break; - case C2_WR_TYPE_RECV: - entry->byte_len = be32_to_cpu(ce->bytes_rcvd); - entry->opcode = IB_WC_RECV; - is_recv = 1; - break; - default: - break; - } - - /* consume the WQEs */ - if (is_recv) - c2_mq_lconsume(&qp->rq_mq, 1); - else - c2_mq_lconsume(&qp->sq_mq, - be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1); - - /* free the message */ - c2_mq_free(&cq->mq); - - return 0; -} - -int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) -{ - struct c2_dev *c2dev = to_c2dev(ibcq->device); - struct c2_cq *cq = to_c2cq(ibcq); - unsigned long flags; - int npolled, err; - - spin_lock_irqsave(&cq->lock, flags); - - for (npolled = 0; npolled < num_entries; ++npolled) { - - err = c2_poll_one(c2dev, cq, entry + npolled); - if (err) - break; - } - - spin_unlock_irqrestore(&cq->lock, flags); - - return npolled; -} - -int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) -{ - struct c2_mq_shared __iomem *shared; - struct c2_cq *cq; - unsigned long flags; - int ret = 0; - - cq = to_c2cq(ibcq); - shared = cq->mq.peer; - - if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP) - writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type); - else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type); - else - return -EINVAL; - - writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed); - - /* - * Now read back shared->armed to make the PCI - * write synchronous. This is necessary for - * correct cq notification semantics. - */ - readb(&shared->armed); - - if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { - spin_lock_irqsave(&cq->lock, flags); - ret = !c2_mq_empty(&cq->mq); - spin_unlock_irqrestore(&cq->lock, flags); - } - - return ret; -} - -static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) -{ - dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size, - mq->msg_pool.host, dma_unmap_addr(mq, mapping)); -} - -static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, - size_t q_size, size_t msg_size) -{ - u8 *pool_start; - - if (q_size > SIZE_MAX / msg_size) - return -EINVAL; - - pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size, - &mq->host_dma, GFP_KERNEL); - if (!pool_start) - return -ENOMEM; - - c2_mq_rep_init(mq, - 0, /* index (currently unknown) */ - q_size, - msg_size, - pool_start, - NULL, /* peer (currently unknown) */ - C2_MQ_HOST_TARGET); - - dma_unmap_addr_set(mq, mapping, mq->host_dma); - - return 0; -} - -int c2_init_cq(struct c2_dev *c2dev, int entries, - struct c2_ucontext *ctx, struct c2_cq *cq) -{ - struct c2wr_cq_create_req wr; - struct c2wr_cq_create_rep *reply; - unsigned long peer_pa; - struct c2_vq_req *vq_req; - int err; - - might_sleep(); - - cq->ibcq.cqe = entries - 1; - cq->is_kernel = !ctx; - - /* Allocate a shared pointer */ - cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, - &cq->mq.shared_dma, GFP_KERNEL); - if (!cq->mq.shared) - return -ENOMEM; - - /* Allocate pages for the message pool */ - err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE); - if (err) - goto bail0; - - vq_req = vq_req_alloc(c2dev); - if (!vq_req) { - err = -ENOMEM; - goto bail1; - } - - memset(&wr, 0, sizeof(wr)); - c2_wr_set_id(&wr, CCWR_CQ_CREATE); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.msg_size = cpu_to_be32(cq->mq.msg_size); - wr.depth = cpu_to_be32(cq->mq.q_size); - wr.shared_ht = cpu_to_be64(cq->mq.shared_dma); - wr.msg_pool = cpu_to_be64(cq->mq.host_dma); - wr.user_context = (u64) (unsigned long) (cq); - - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail2; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail2; - - reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) { - err = -ENOMEM; - goto bail2; - } - - if ((err = c2_errno(reply)) != 0) - goto bail3; - - cq->adapter_handle = reply->cq_handle; - cq->mq.index = be32_to_cpu(reply->mq_index); - - peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared); - cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE); - if (!cq->mq.peer) { - err = -ENOMEM; - goto bail3; - } - - vq_repbuf_free(c2dev, reply); - vq_req_free(c2dev, vq_req); - - spin_lock_init(&cq->lock); - atomic_set(&cq->refcount, 1); - init_waitqueue_head(&cq->wait); - - /* - * Use the MQ index allocated by the adapter to - * store the CQ in the qptr_array - */ - cq->cqn = cq->mq.index; - c2dev->qptr_array[cq->cqn] = cq; - - return 0; - - bail3: - vq_repbuf_free(c2dev, reply); - bail2: - vq_req_free(c2dev, vq_req); - bail1: - c2_free_cq_buf(c2dev, &cq->mq); - bail0: - c2_free_mqsp(cq->mq.shared); - - return err; -} - -void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq) -{ - int err; - struct c2_vq_req *vq_req; - struct c2wr_cq_destroy_req wr; - struct c2wr_cq_destroy_rep *reply; - - might_sleep(); - - /* Clear CQ from the qptr array */ - spin_lock_irq(&c2dev->lock); - c2dev->qptr_array[cq->mq.index] = NULL; - atomic_dec(&cq->refcount); - spin_unlock_irq(&c2dev->lock); - - wait_event(cq->wait, !atomic_read(&cq->refcount)); - - vq_req = vq_req_alloc(c2dev); - if (!vq_req) { - goto bail0; - } - - memset(&wr, 0, sizeof(wr)); - c2_wr_set_id(&wr, CCWR_CQ_DESTROY); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.cq_handle = cq->adapter_handle; - - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail1; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail1; - - reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg); - if (reply) - vq_repbuf_free(c2dev, reply); - bail1: - vq_req_free(c2dev, vq_req); - bail0: - if (cq->is_kernel) { - c2_free_cq_buf(c2dev, &cq->mq); - } - - return; -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_intr.c b/kernel/drivers/infiniband/hw/amso1100/c2_intr.c deleted file mode 100644 index 3a17d9b36..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_intr.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "c2.h" -#include <rdma/iw_cm.h> -#include "c2_vq.h" - -static void handle_mq(struct c2_dev *c2dev, u32 index); -static void handle_vq(struct c2_dev *c2dev, u32 mq_index); - -/* - * Handle RNIC interrupts - */ -void c2_rnic_interrupt(struct c2_dev *c2dev) -{ - unsigned int mq_index; - - while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) { - mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT); - if (mq_index & 0x80000000) { - break; - } - - c2dev->hints_read++; - handle_mq(c2dev, mq_index); - } - -} - -/* - * Top level MQ handler - */ -static void handle_mq(struct c2_dev *c2dev, u32 mq_index) -{ - if (c2dev->qptr_array[mq_index] == NULL) { - pr_debug("handle_mq: stray activity for mq_index=%d\n", - mq_index); - return; - } - - switch (mq_index) { - case (0): - /* - * An index of 0 in the activity queue - * indicates the req vq now has messages - * available... - * - * Wake up any waiters waiting on req VQ - * message availability. - */ - wake_up(&c2dev->req_vq_wo); - break; - case (1): - handle_vq(c2dev, mq_index); - break; - case (2): - /* We have to purge the VQ in case there are pending - * accept reply requests that would result in the - * generation of an ESTABLISHED event. If we don't - * generate these first, a CLOSE event could end up - * being delivered before the ESTABLISHED event. - */ - handle_vq(c2dev, 1); - - c2_ae_event(c2dev, mq_index); - break; - default: - /* There is no event synchronization between CQ events - * and AE or CM events. In fact, CQE could be - * delivered for all of the I/O up to and including the - * FLUSH for a peer disconenct prior to the ESTABLISHED - * event being delivered to the app. The reason for this - * is that CM events are delivered on a thread, while AE - * and CM events are delivered on interrupt context. - */ - c2_cq_event(c2dev, mq_index); - break; - } - - return; -} - -/* - * Handles verbs WR replies. - */ -static void handle_vq(struct c2_dev *c2dev, u32 mq_index) -{ - void *adapter_msg, *reply_msg; - struct c2wr_hdr *host_msg; - struct c2wr_hdr tmp; - struct c2_mq *reply_vq; - struct c2_vq_req *req; - struct iw_cm_event cm_event; - int err; - - reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index]; - - /* - * get next msg from mq_index into adapter_msg. - * don't free it yet. - */ - adapter_msg = c2_mq_consume(reply_vq); - if (adapter_msg == NULL) { - return; - } - - host_msg = vq_repbuf_alloc(c2dev); - - /* - * If we can't get a host buffer, then we'll still - * wakeup the waiter, we just won't give him the msg. - * It is assumed the waiter will deal with this... - */ - if (!host_msg) { - pr_debug("handle_vq: no repbufs!\n"); - - /* - * just copy the WR header into a local variable. - * this allows us to still demux on the context - */ - host_msg = &tmp; - memcpy(host_msg, adapter_msg, sizeof(tmp)); - reply_msg = NULL; - } else { - memcpy(host_msg, adapter_msg, reply_vq->msg_size); - reply_msg = host_msg; - } - - /* - * consume the msg from the MQ - */ - c2_mq_free(reply_vq); - - /* - * wakeup the waiter. - */ - req = (struct c2_vq_req *) (unsigned long) host_msg->context; - if (req == NULL) { - /* - * We should never get here, as the adapter should - * never send us a reply that we're not expecting. - */ - if (reply_msg != NULL) - vq_repbuf_free(c2dev, host_msg); - pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n"); - return; - } - - if (reply_msg) - err = c2_errno(reply_msg); - else - err = -ENOMEM; - - if (!err) switch (req->event) { - case IW_CM_EVENT_ESTABLISHED: - c2_set_qp_state(req->qp, - C2_QP_STATE_RTS); - /* - * Until ird/ord negotiation via MPAv2 support is added, send - * max supported values - */ - cm_event.ird = cm_event.ord = 128; - case IW_CM_EVENT_CLOSE: - - /* - * Move the QP to RTS if this is - * the established event - */ - cm_event.event = req->event; - cm_event.status = 0; - cm_event.local_addr = req->cm_id->local_addr; - cm_event.remote_addr = req->cm_id->remote_addr; - cm_event.private_data = NULL; - cm_event.private_data_len = 0; - req->cm_id->event_handler(req->cm_id, &cm_event); - break; - default: - break; - } - - req->reply_msg = (u64) (unsigned long) (reply_msg); - atomic_set(&req->reply_ready, 1); - wake_up(&req->wait_object); - - /* - * If the request was cancelled, then this put will - * free the vq_req memory...and reply_msg!!! - */ - vq_req_put(c2dev, req); -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_mm.c b/kernel/drivers/infiniband/hw/amso1100/c2_mm.c deleted file mode 100644 index 119c4f3d9..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_mm.c +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/slab.h> - -#include "c2.h" -#include "c2_vq.h" - -#define PBL_VIRT 1 -#define PBL_PHYS 2 - -/* - * Send all the PBL messages to convey the remainder of the PBL - * Wait for the adapter's reply on the last one. - * This is indicated by setting the MEM_PBL_COMPLETE in the flags. - * - * NOTE: vq_req is _not_ freed by this function. The VQ Host - * Reply buffer _is_ freed by this function. - */ -static int -send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index, - unsigned long va, u32 pbl_depth, - struct c2_vq_req *vq_req, int pbl_type) -{ - u32 pbe_count; /* amt that fits in a PBL msg */ - u32 count; /* amt in this PBL MSG. */ - struct c2wr_nsmr_pbl_req *wr; /* PBL WR ptr */ - struct c2wr_nsmr_pbl_rep *reply; /* reply ptr */ - int err, pbl_virt, pbl_index, i; - - switch (pbl_type) { - case PBL_VIRT: - pbl_virt = 1; - break; - case PBL_PHYS: - pbl_virt = 0; - break; - default: - return -EINVAL; - break; - } - - pbe_count = (c2dev->req_vq.msg_size - - sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64); - wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); - if (!wr) { - return -ENOMEM; - } - c2_wr_set_id(wr, CCWR_NSMR_PBL); - - /* - * Only the last PBL message will generate a reply from the verbs, - * so we set the context to 0 indicating there is no kernel verbs - * handler blocked awaiting this reply. - */ - wr->hdr.context = 0; - wr->rnic_handle = c2dev->adapter_handle; - wr->stag_index = stag_index; /* already swapped */ - wr->flags = 0; - pbl_index = 0; - while (pbl_depth) { - count = min(pbe_count, pbl_depth); - wr->addrs_length = cpu_to_be32(count); - - /* - * If this is the last message, then reference the - * vq request struct cuz we're gonna wait for a reply. - * also make this PBL msg as the last one. - */ - if (count == pbl_depth) { - /* - * reference the request struct. dereferenced in the - * int handler. - */ - vq_req_get(c2dev, vq_req); - wr->flags = cpu_to_be32(MEM_PBL_COMPLETE); - - /* - * This is the last PBL message. - * Set the context to our VQ Request Object so we can - * wait for the reply. - */ - wr->hdr.context = (unsigned long) vq_req; - } - - /* - * If pbl_virt is set then va is a virtual address - * that describes a virtually contiguous memory - * allocation. The wr needs the start of each virtual page - * to be converted to the corresponding physical address - * of the page. If pbl_virt is not set then va is an array - * of physical addresses and there is no conversion to do. - * Just fill in the wr with what is in the array. - */ - for (i = 0; i < count; i++) { - if (pbl_virt) { - va += PAGE_SIZE; - } else { - wr->paddrs[i] = - cpu_to_be64(((u64 *)va)[pbl_index + i]); - } - } - - /* - * Send WR to adapter - */ - err = vq_send_wr(c2dev, (union c2wr *) wr); - if (err) { - if (count <= pbe_count) { - vq_req_put(c2dev, vq_req); - } - goto bail0; - } - pbl_depth -= count; - pbl_index += count; - } - - /* - * Now wait for the reply... - */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) { - goto bail0; - } - - /* - * Process reply - */ - reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg; - if (!reply) { - err = -ENOMEM; - goto bail0; - } - - err = c2_errno(reply); - - vq_repbuf_free(c2dev, reply); - bail0: - kfree(wr); - return err; -} - -#define C2_PBL_MAX_DEPTH 131072 -int -c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list, - int page_size, int pbl_depth, u32 length, - u32 offset, u64 *va, enum c2_acf acf, - struct c2_mr *mr) -{ - struct c2_vq_req *vq_req; - struct c2wr_nsmr_register_req *wr; - struct c2wr_nsmr_register_rep *reply; - u16 flags; - int i, pbe_count, count; - int err; - - if (!va || !length || !addr_list || !pbl_depth) - return -EINTR; - - /* - * Verify PBL depth is within rnic max - */ - if (pbl_depth > C2_PBL_MAX_DEPTH) { - return -EINTR; - } - - /* - * allocate verbs request object - */ - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); - if (!wr) { - err = -ENOMEM; - goto bail0; - } - - /* - * build the WR - */ - c2_wr_set_id(wr, CCWR_NSMR_REGISTER); - wr->hdr.context = (unsigned long) vq_req; - wr->rnic_handle = c2dev->adapter_handle; - - flags = (acf | MEM_VA_BASED | MEM_REMOTE); - - /* - * compute how many pbes can fit in the message - */ - pbe_count = (c2dev->req_vq.msg_size - - sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64); - - if (pbl_depth <= pbe_count) { - flags |= MEM_PBL_COMPLETE; - } - wr->flags = cpu_to_be16(flags); - wr->stag_key = 0; //stag_key; - wr->va = cpu_to_be64(*va); - wr->pd_id = mr->pd->pd_id; - wr->pbe_size = cpu_to_be32(page_size); - wr->length = cpu_to_be32(length); - wr->pbl_depth = cpu_to_be32(pbl_depth); - wr->fbo = cpu_to_be32(offset); - count = min(pbl_depth, pbe_count); - wr->addrs_length = cpu_to_be32(count); - - /* - * fill out the PBL for this message - */ - for (i = 0; i < count; i++) { - wr->paddrs[i] = cpu_to_be64(addr_list[i]); - } - - /* - * regerence the request struct - */ - vq_req_get(c2dev, vq_req); - - /* - * send the WR to the adapter - */ - err = vq_send_wr(c2dev, (union c2wr *) wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail1; - } - - /* - * wait for reply from adapter - */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) { - goto bail1; - } - - /* - * process reply - */ - reply = - (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) { - err = -ENOMEM; - goto bail1; - } - if ((err = c2_errno(reply))) { - goto bail2; - } - //*p_pb_entries = be32_to_cpu(reply->pbl_depth); - mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index); - vq_repbuf_free(c2dev, reply); - - /* - * if there are still more PBEs we need to send them to - * the adapter and wait for a reply on the final one. - * reuse vq_req for this purpose. - */ - pbl_depth -= count; - if (pbl_depth) { - - vq_req->reply_msg = (unsigned long) NULL; - atomic_set(&vq_req->reply_ready, 0); - err = send_pbl_messages(c2dev, - cpu_to_be32(mr->ibmr.lkey), - (unsigned long) &addr_list[i], - pbl_depth, vq_req, PBL_PHYS); - if (err) { - goto bail1; - } - } - - vq_req_free(c2dev, vq_req); - kfree(wr); - - return err; - - bail2: - vq_repbuf_free(c2dev, reply); - bail1: - kfree(wr); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - -int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index) -{ - struct c2_vq_req *vq_req; /* verbs request object */ - struct c2wr_stag_dealloc_req wr; /* work request */ - struct c2wr_stag_dealloc_rep *reply; /* WR reply */ - int err; - - - /* - * allocate verbs request object - */ - vq_req = vq_req_alloc(c2dev); - if (!vq_req) { - return -ENOMEM; - } - - /* - * Build the WR - */ - c2_wr_set_id(&wr, CCWR_STAG_DEALLOC); - wr.hdr.context = (u64) (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.stag_index = cpu_to_be32(stag_index); - - /* - * reference the request struct. dereferenced in the int handler. - */ - vq_req_get(c2dev, vq_req); - - /* - * Send WR to adapter - */ - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - /* - * Wait for reply from adapter - */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) { - goto bail0; - } - - /* - * Process reply - */ - reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg; - if (!reply) { - err = -ENOMEM; - goto bail0; - } - - err = c2_errno(reply); - - vq_repbuf_free(c2dev, reply); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_mq.c b/kernel/drivers/infiniband/hw/amso1100/c2_mq.c deleted file mode 100644 index 0cddc49be..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_mq.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "c2.h" -#include "c2_mq.h" - -void *c2_mq_alloc(struct c2_mq *q) -{ - BUG_ON(q->magic != C2_MQ_MAGIC); - BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); - - if (c2_mq_full(q)) { - return NULL; - } else { -#ifdef DEBUG - struct c2wr_hdr *m = - (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size); -#ifdef CCMSGMAGIC - BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC)); - m->magic = cpu_to_be32(CCWR_MAGIC); -#endif - return m; -#else - return q->msg_pool.host + q->priv * q->msg_size; -#endif - } -} - -void c2_mq_produce(struct c2_mq *q) -{ - BUG_ON(q->magic != C2_MQ_MAGIC); - BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); - - if (!c2_mq_full(q)) { - q->priv = (q->priv + 1) % q->q_size; - q->hint_count++; - /* Update peer's offset. */ - __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared); - } -} - -void *c2_mq_consume(struct c2_mq *q) -{ - BUG_ON(q->magic != C2_MQ_MAGIC); - BUG_ON(q->type != C2_MQ_HOST_TARGET); - - if (c2_mq_empty(q)) { - return NULL; - } else { -#ifdef DEBUG - struct c2wr_hdr *m = (struct c2wr_hdr *) - (q->msg_pool.host + q->priv * q->msg_size); -#ifdef CCMSGMAGIC - BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC)); -#endif - return m; -#else - return q->msg_pool.host + q->priv * q->msg_size; -#endif - } -} - -void c2_mq_free(struct c2_mq *q) -{ - BUG_ON(q->magic != C2_MQ_MAGIC); - BUG_ON(q->type != C2_MQ_HOST_TARGET); - - if (!c2_mq_empty(q)) { - -#ifdef CCMSGMAGIC - { - struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *) - (q->msg_pool.adapter + q->priv * q->msg_size); - __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic); - } -#endif - q->priv = (q->priv + 1) % q->q_size; - /* Update peer's offset. */ - __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared); - } -} - - -void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count) -{ - BUG_ON(q->magic != C2_MQ_MAGIC); - BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); - - while (wqe_count--) { - BUG_ON(c2_mq_empty(q)); - *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size); - } -} - -#if 0 -u32 c2_mq_count(struct c2_mq *q) -{ - s32 count; - - if (q->type == C2_MQ_HOST_TARGET) - count = be16_to_cpu(*q->shared) - q->priv; - else - count = q->priv - be16_to_cpu(*q->shared); - - if (count < 0) - count += q->q_size; - - return (u32) count; -} -#endif /* 0 */ - -void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, - u8 __iomem *pool_start, u16 __iomem *peer, u32 type) -{ - BUG_ON(!q->shared); - - /* This code assumes the byte swapping has already been done! */ - q->index = index; - q->q_size = q_size; - q->msg_size = msg_size; - q->msg_pool.adapter = pool_start; - q->peer = (struct c2_mq_shared __iomem *) peer; - q->magic = C2_MQ_MAGIC; - q->type = type; - q->priv = 0; - q->hint_count = 0; - return; -} -void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, - u8 *pool_start, u16 __iomem *peer, u32 type) -{ - BUG_ON(!q->shared); - - /* This code assumes the byte swapping has already been done! */ - q->index = index; - q->q_size = q_size; - q->msg_size = msg_size; - q->msg_pool.host = pool_start; - q->peer = (struct c2_mq_shared __iomem *) peer; - q->magic = C2_MQ_MAGIC; - q->type = type; - q->priv = 0; - q->hint_count = 0; - return; -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_mq.h b/kernel/drivers/infiniband/hw/amso1100/c2_mq.h deleted file mode 100644 index fc1b9a7ce..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_mq.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _C2_MQ_H_ -#define _C2_MQ_H_ -#include <linux/kernel.h> -#include <linux/dma-mapping.h> -#include "c2_wr.h" - -enum c2_shared_regs { - - C2_SHARED_ARMED = 0x10, - C2_SHARED_NOTIFY = 0x18, - C2_SHARED_SHARED = 0x40, -}; - -struct c2_mq_shared { - u16 unused1; - u8 armed; - u8 notification_type; - u32 unused2; - u16 shared; - /* Pad to 64 bytes. */ - u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)]; -}; - -enum c2_mq_type { - C2_MQ_HOST_TARGET = 1, - C2_MQ_ADAPTER_TARGET = 2, -}; - -/* - * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ. - * c2_user_mq_t (which is the same format) is for user-mode MQs... - */ -#define C2_MQ_MAGIC 0x4d512020 /* 'MQ ' */ -struct c2_mq { - u32 magic; - union { - u8 *host; - u8 __iomem *adapter; - } msg_pool; - dma_addr_t host_dma; - DEFINE_DMA_UNMAP_ADDR(mapping); - u16 hint_count; - u16 priv; - struct c2_mq_shared __iomem *peer; - __be16 *shared; - dma_addr_t shared_dma; - u32 q_size; - u32 msg_size; - u32 index; - enum c2_mq_type type; -}; - -static __inline__ int c2_mq_empty(struct c2_mq *q) -{ - return q->priv == be16_to_cpu(*q->shared); -} - -static __inline__ int c2_mq_full(struct c2_mq *q) -{ - return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size; -} - -extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count); -extern void *c2_mq_alloc(struct c2_mq *q); -extern void c2_mq_produce(struct c2_mq *q); -extern void *c2_mq_consume(struct c2_mq *q); -extern void c2_mq_free(struct c2_mq *q); -extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, - u8 __iomem *pool_start, u16 __iomem *peer, u32 type); -extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, - u8 *pool_start, u16 __iomem *peer, u32 type); - -#endif /* _C2_MQ_H_ */ diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_pd.c b/kernel/drivers/infiniband/hw/amso1100/c2_pd.c deleted file mode 100644 index f3e81dc35..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_pd.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/errno.h> - -#include "c2.h" -#include "c2_provider.h" - -int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd) -{ - u32 obj; - int ret = 0; - - spin_lock(&c2dev->pd_table.lock); - obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max, - c2dev->pd_table.last); - if (obj >= c2dev->pd_table.max) - obj = find_first_zero_bit(c2dev->pd_table.table, - c2dev->pd_table.max); - if (obj < c2dev->pd_table.max) { - pd->pd_id = obj; - __set_bit(obj, c2dev->pd_table.table); - c2dev->pd_table.last = obj+1; - if (c2dev->pd_table.last >= c2dev->pd_table.max) - c2dev->pd_table.last = 0; - } else - ret = -ENOMEM; - spin_unlock(&c2dev->pd_table.lock); - return ret; -} - -void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd) -{ - spin_lock(&c2dev->pd_table.lock); - __clear_bit(pd->pd_id, c2dev->pd_table.table); - spin_unlock(&c2dev->pd_table.lock); -} - -int c2_init_pd_table(struct c2_dev *c2dev) -{ - - c2dev->pd_table.last = 0; - c2dev->pd_table.max = c2dev->props.max_pd; - spin_lock_init(&c2dev->pd_table.lock); - c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) * - sizeof(long), GFP_KERNEL); - if (!c2dev->pd_table.table) - return -ENOMEM; - bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd); - return 0; -} - -void c2_cleanup_pd_table(struct c2_dev *c2dev) -{ - kfree(c2dev->pd_table.table); -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_provider.c b/kernel/drivers/infiniband/hw/amso1100/c2_provider.c deleted file mode 100644 index bdf350781..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_provider.c +++ /dev/null @@ -1,882 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/pci.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/inetdevice.h> -#include <linux/delay.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/if_vlan.h> -#include <linux/crc32.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/init.h> -#include <linux/dma-mapping.h> -#include <linux/if_arp.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/byteorder.h> - -#include <rdma/ib_smi.h> -#include <rdma/ib_umem.h> -#include <rdma/ib_user_verbs.h> -#include "c2.h" -#include "c2_provider.h" -#include "c2_user.h" - -static int c2_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) -{ - struct c2_dev *c2dev = to_c2dev(ibdev); - - pr_debug("%s:%u\n", __func__, __LINE__); - - *props = c2dev->props; - return 0; -} - -static int c2_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - - props->max_mtu = IB_MTU_4096; - props->lid = 0; - props->lmc = 0; - props->sm_lid = 0; - props->sm_sl = 0; - props->state = IB_PORT_ACTIVE; - props->phys_state = 0; - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; - props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; - props->qkey_viol_cntr = 0; - props->active_width = 1; - props->active_speed = IB_SPEED_SDR; - - return 0; -} - -static int c2_query_pkey(struct ib_device *ibdev, - u8 port, u16 index, u16 * pkey) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - *pkey = 0; - return 0; -} - -static int c2_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct c2_dev *c2dev = to_c2dev(ibdev); - - pr_debug("%s:%u\n", __func__, __LINE__); - memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6); - - return 0; -} - -/* Allocate the user context data structure. This keeps track - * of all objects associated with a particular user-mode client. - */ -static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) -{ - struct c2_ucontext *context; - - pr_debug("%s:%u\n", __func__, __LINE__); - context = kmalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - - return &context->ibucontext; -} - -static int c2_dealloc_ucontext(struct ib_ucontext *context) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - kfree(context); - return 0; -} - -static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return -ENOSYS; -} - -static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - struct c2_pd *pd; - int err; - - pr_debug("%s:%u\n", __func__, __LINE__); - - pd = kmalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - - err = c2_pd_alloc(to_c2dev(ibdev), !context, pd); - if (err) { - kfree(pd); - return ERR_PTR(err); - } - - if (context) { - if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) { - c2_pd_free(to_c2dev(ibdev), pd); - kfree(pd); - return ERR_PTR(-EFAULT); - } - } - - return &pd->ibpd; -} - -static int c2_dealloc_pd(struct ib_pd *pd) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - c2_pd_free(to_c2dev(pd->device), to_c2pd(pd)); - kfree(pd); - - return 0; -} - -static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return ERR_PTR(-ENOSYS); -} - -static int c2_ah_destroy(struct ib_ah *ah) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return -ENOSYS; -} - -static void c2_add_ref(struct ib_qp *ibqp) -{ - struct c2_qp *qp; - BUG_ON(!ibqp); - qp = to_c2qp(ibqp); - atomic_inc(&qp->refcount); -} - -static void c2_rem_ref(struct ib_qp *ibqp) -{ - struct c2_qp *qp; - BUG_ON(!ibqp); - qp = to_c2qp(ibqp); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); -} - -struct ib_qp *c2_get_qp(struct ib_device *device, int qpn) -{ - struct c2_dev* c2dev = to_c2dev(device); - struct c2_qp *qp; - - qp = c2_find_qpn(c2dev, qpn); - pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n", - __func__, qp, qpn, device, - (qp?atomic_read(&qp->refcount):0)); - - return (qp?&qp->ibqp:NULL); -} - -static struct ib_qp *c2_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) -{ - struct c2_qp *qp; - int err; - - pr_debug("%s:%u\n", __func__, __LINE__); - - if (init_attr->create_flags) - return ERR_PTR(-EINVAL); - - switch (init_attr->qp_type) { - case IB_QPT_RC: - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) { - pr_debug("%s: Unable to allocate QP\n", __func__); - return ERR_PTR(-ENOMEM); - } - spin_lock_init(&qp->lock); - if (pd->uobject) { - /* userspace specific */ - } - - err = c2_alloc_qp(to_c2dev(pd->device), - to_c2pd(pd), init_attr, qp); - - if (err && pd->uobject) { - /* userspace specific */ - } - - break; - default: - pr_debug("%s: Invalid QP type: %d\n", __func__, - init_attr->qp_type); - return ERR_PTR(-EINVAL); - } - - if (err) { - kfree(qp); - return ERR_PTR(err); - } - - return &qp->ibqp; -} - -static int c2_destroy_qp(struct ib_qp *ib_qp) -{ - struct c2_qp *qp = to_c2qp(ib_qp); - - pr_debug("%s:%u qp=%p,qp->state=%d\n", - __func__, __LINE__, ib_qp, qp->state); - c2_free_qp(to_c2dev(ib_qp->device), qp); - kfree(qp); - return 0; -} - -static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, int vector, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - struct c2_cq *cq; - int err; - - cq = kmalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - pr_debug("%s: Unable to allocate CQ\n", __func__); - return ERR_PTR(-ENOMEM); - } - - err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq); - if (err) { - pr_debug("%s: error initializing CQ\n", __func__); - kfree(cq); - return ERR_PTR(err); - } - - return &cq->ibcq; -} - -static int c2_destroy_cq(struct ib_cq *ib_cq) -{ - struct c2_cq *cq = to_c2cq(ib_cq); - - pr_debug("%s:%u\n", __func__, __LINE__); - - c2_free_cq(to_c2dev(ib_cq->device), cq); - kfree(cq); - - return 0; -} - -static inline u32 c2_convert_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) | - C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND; -} - -static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 * iova_start) -{ - struct c2_mr *mr; - u64 *page_list; - u32 total_len; - int err, i, j, k, page_shift, pbl_depth; - - pbl_depth = 0; - total_len = 0; - - page_shift = PAGE_SHIFT; - /* - * If there is only 1 buffer we assume this could - * be a map of all phy mem...use a 32k page_shift. - */ - if (num_phys_buf == 1) - page_shift += 3; - - for (i = 0; i < num_phys_buf; i++) { - - if (buffer_list[i].addr & ~PAGE_MASK) { - pr_debug("Unaligned Memory Buffer: 0x%x\n", - (unsigned int) buffer_list[i].addr); - return ERR_PTR(-EINVAL); - } - - if (!buffer_list[i].size) { - pr_debug("Invalid Buffer Size\n"); - return ERR_PTR(-EINVAL); - } - - total_len += buffer_list[i].size; - pbl_depth += ALIGN(buffer_list[i].size, - (1 << page_shift)) >> page_shift; - } - - page_list = vmalloc(sizeof(u64) * pbl_depth); - if (!page_list) { - pr_debug("couldn't vmalloc page_list of size %zd\n", - (sizeof(u64) * pbl_depth)); - return ERR_PTR(-ENOMEM); - } - - for (i = 0, j = 0; i < num_phys_buf; i++) { - - int naddrs; - - naddrs = ALIGN(buffer_list[i].size, - (1 << page_shift)) >> page_shift; - for (k = 0; k < naddrs; k++) - page_list[j++] = (buffer_list[i].addr + - (k << page_shift)); - } - - mr = kmalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - vfree(page_list); - return ERR_PTR(-ENOMEM); - } - - mr->pd = to_c2pd(ib_pd); - mr->umem = NULL; - pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " - "*iova_start %llx, first pa %llx, last pa %llx\n", - __func__, page_shift, pbl_depth, total_len, - (unsigned long long) *iova_start, - (unsigned long long) page_list[0], - (unsigned long long) page_list[pbl_depth-1]); - err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list, - (1 << page_shift), pbl_depth, - total_len, 0, iova_start, - c2_convert_access(acc), mr); - vfree(page_list); - if (err) { - kfree(mr); - return ERR_PTR(err); - } - - return &mr->ibmr; -} - -static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc) -{ - struct ib_phys_buf bl; - u64 kva = 0; - - pr_debug("%s:%u\n", __func__, __LINE__); - - /* AMSO1100 limit */ - bl.size = 0xffffffff; - bl.addr = 0; - return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); -} - -static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int acc, struct ib_udata *udata) -{ - u64 *pages; - u64 kva = 0; - int shift, n, len; - int i, k, entry; - int err = 0; - struct scatterlist *sg; - struct c2_pd *c2pd = to_c2pd(pd); - struct c2_mr *c2mr; - - pr_debug("%s:%u\n", __func__, __LINE__); - - c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); - if (!c2mr) - return ERR_PTR(-ENOMEM); - c2mr->pd = c2pd; - - c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); - if (IS_ERR(c2mr->umem)) { - err = PTR_ERR(c2mr->umem); - kfree(c2mr); - return ERR_PTR(err); - } - - shift = ffs(c2mr->umem->page_size) - 1; - n = c2mr->umem->nmap; - - pages = kmalloc(n * sizeof(u64), GFP_KERNEL); - if (!pages) { - err = -ENOMEM; - goto err; - } - - i = 0; - for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) { - len = sg_dma_len(sg) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = - sg_dma_address(sg) + - (c2mr->umem->page_size * k); - } - } - - kva = virt; - err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), - pages, - c2mr->umem->page_size, - i, - length, - ib_umem_offset(c2mr->umem), - &kva, - c2_convert_access(acc), - c2mr); - kfree(pages); - if (err) - goto err; - return &c2mr->ibmr; - -err: - ib_umem_release(c2mr->umem); - kfree(c2mr); - return ERR_PTR(err); -} - -static int c2_dereg_mr(struct ib_mr *ib_mr) -{ - struct c2_mr *mr = to_c2mr(ib_mr); - int err; - - pr_debug("%s:%u\n", __func__, __LINE__); - - err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); - if (err) - pr_debug("c2_stag_dealloc failed: %d\n", err); - else { - if (mr->umem) - ib_umem_release(mr->umem); - kfree(mr); - } - - return err; -} - -static ssize_t show_rev(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev); - pr_debug("%s:%u\n", __func__, __LINE__); - return sprintf(buf, "%x\n", c2dev->props.hw_ver); -} - -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev); - pr_debug("%s:%u\n", __func__, __LINE__); - return sprintf(buf, "%x.%x.%x\n", - (int) (c2dev->props.fw_ver >> 32), - (int) (c2dev->props.fw_ver >> 16) & 0xffff, - (int) (c2dev->props.fw_ver & 0xffff)); -} - -static ssize_t show_hca(struct device *dev, struct device_attribute *attr, - char *buf) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return sprintf(buf, "AMSO1100\n"); -} - -static ssize_t show_board(struct device *dev, struct device_attribute *attr, - char *buf) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID"); -} - -static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); -static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); -static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); - -static struct device_attribute *c2_dev_attributes[] = { - &dev_attr_hw_rev, - &dev_attr_fw_ver, - &dev_attr_hca_type, - &dev_attr_board_id -}; - -static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - int err; - - err = - c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr, - attr_mask); - - return err; -} - -static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return -ENOSYS; -} - -static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return -ENOSYS; -} - -static int c2_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return -ENOSYS; -} - -static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - - /* Request a connection */ - return c2_llp_connect(cm_id, iw_param); -} - -static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - - /* Accept the new connection */ - return c2_llp_accept(cm_id, iw_param); -} - -static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) -{ - int err; - - pr_debug("%s:%u\n", __func__, __LINE__); - - err = c2_llp_reject(cm_id, pdata, pdata_len); - return err; -} - -static int c2_service_create(struct iw_cm_id *cm_id, int backlog) -{ - int err; - - pr_debug("%s:%u\n", __func__, __LINE__); - err = c2_llp_service_create(cm_id, backlog); - pr_debug("%s:%u err=%d\n", - __func__, __LINE__, - err); - return err; -} - -static int c2_service_destroy(struct iw_cm_id *cm_id) -{ - int err; - pr_debug("%s:%u\n", __func__, __LINE__); - - err = c2_llp_service_destroy(cm_id); - - return err; -} - -static int c2_pseudo_up(struct net_device *netdev) -{ - struct in_device *ind; - struct c2_dev *c2dev = netdev->ml_priv; - - ind = in_dev_get(netdev); - if (!ind) - return 0; - - pr_debug("adding...\n"); - for_ifa(ind) { -#ifdef DEBUG - u8 *ip = (u8 *) & ifa->ifa_address; - - pr_debug("%s: %d.%d.%d.%d\n", - ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]); -#endif - c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask); - } - endfor_ifa(ind); - in_dev_put(ind); - - return 0; -} - -static int c2_pseudo_down(struct net_device *netdev) -{ - struct in_device *ind; - struct c2_dev *c2dev = netdev->ml_priv; - - ind = in_dev_get(netdev); - if (!ind) - return 0; - - pr_debug("deleting...\n"); - for_ifa(ind) { -#ifdef DEBUG - u8 *ip = (u8 *) & ifa->ifa_address; - - pr_debug("%s: %d.%d.%d.%d\n", - ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]); -#endif - c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask); - } - endfor_ifa(ind); - in_dev_put(ind); - - return 0; -} - -static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev) -{ - kfree_skb(skb); - return NETDEV_TX_OK; -} - -static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu) -{ - if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) - return -EINVAL; - - netdev->mtu = new_mtu; - - /* TODO: Tell rnic about new rmda interface mtu */ - return 0; -} - -static const struct net_device_ops c2_pseudo_netdev_ops = { - .ndo_open = c2_pseudo_up, - .ndo_stop = c2_pseudo_down, - .ndo_start_xmit = c2_pseudo_xmit_frame, - .ndo_change_mtu = c2_pseudo_change_mtu, - .ndo_validate_addr = eth_validate_addr, -}; - -static void setup(struct net_device *netdev) -{ - netdev->netdev_ops = &c2_pseudo_netdev_ops; - - netdev->watchdog_timeo = 0; - netdev->type = ARPHRD_ETHER; - netdev->mtu = 1500; - netdev->hard_header_len = ETH_HLEN; - netdev->addr_len = ETH_ALEN; - netdev->tx_queue_len = 0; - netdev->flags |= IFF_NOARP; -} - -static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) -{ - char name[IFNAMSIZ]; - struct net_device *netdev; - - /* change ethxxx to iwxxx */ - strcpy(name, "iw"); - strcat(name, &c2dev->netdev->name[3]); - netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, setup); - if (!netdev) { - printk(KERN_ERR PFX "%s - etherdev alloc failed", - __func__); - return NULL; - } - - netdev->ml_priv = c2dev; - - SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); - - memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6); - - /* Print out the MAC address */ - pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr); - -#if 0 - /* Disable network packets */ - netif_stop_queue(netdev); -#endif - return netdev; -} - -int c2_register_device(struct c2_dev *dev) -{ - int ret = -ENOMEM; - int i; - - /* Register pseudo network device */ - dev->pseudo_netdev = c2_pseudo_netdev_init(dev); - if (!dev->pseudo_netdev) - goto out; - - ret = register_netdev(dev->pseudo_netdev); - if (ret) - goto out_free_netdev; - - pr_debug("%s:%u\n", __func__, __LINE__); - strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX); - dev->ibdev.owner = THIS_MODULE; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); - - dev->ibdev.node_type = RDMA_NODE_RNIC; - memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); - memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); - dev->ibdev.phys_port_cnt = 1; - dev->ibdev.num_comp_vectors = 1; - dev->ibdev.dma_device = &dev->pcidev->dev; - dev->ibdev.query_device = c2_query_device; - dev->ibdev.query_port = c2_query_port; - dev->ibdev.query_pkey = c2_query_pkey; - dev->ibdev.query_gid = c2_query_gid; - dev->ibdev.alloc_ucontext = c2_alloc_ucontext; - dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext; - dev->ibdev.mmap = c2_mmap_uar; - dev->ibdev.alloc_pd = c2_alloc_pd; - dev->ibdev.dealloc_pd = c2_dealloc_pd; - dev->ibdev.create_ah = c2_ah_create; - dev->ibdev.destroy_ah = c2_ah_destroy; - dev->ibdev.create_qp = c2_create_qp; - dev->ibdev.modify_qp = c2_modify_qp; - dev->ibdev.destroy_qp = c2_destroy_qp; - dev->ibdev.create_cq = c2_create_cq; - dev->ibdev.destroy_cq = c2_destroy_cq; - dev->ibdev.poll_cq = c2_poll_cq; - dev->ibdev.get_dma_mr = c2_get_dma_mr; - dev->ibdev.reg_phys_mr = c2_reg_phys_mr; - dev->ibdev.reg_user_mr = c2_reg_user_mr; - dev->ibdev.dereg_mr = c2_dereg_mr; - - dev->ibdev.alloc_fmr = NULL; - dev->ibdev.unmap_fmr = NULL; - dev->ibdev.dealloc_fmr = NULL; - dev->ibdev.map_phys_fmr = NULL; - - dev->ibdev.attach_mcast = c2_multicast_attach; - dev->ibdev.detach_mcast = c2_multicast_detach; - dev->ibdev.process_mad = c2_process_mad; - - dev->ibdev.req_notify_cq = c2_arm_cq; - dev->ibdev.post_send = c2_post_send; - dev->ibdev.post_recv = c2_post_receive; - - dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); - if (dev->ibdev.iwcm == NULL) { - ret = -ENOMEM; - goto out_unregister_netdev; - } - dev->ibdev.iwcm->add_ref = c2_add_ref; - dev->ibdev.iwcm->rem_ref = c2_rem_ref; - dev->ibdev.iwcm->get_qp = c2_get_qp; - dev->ibdev.iwcm->connect = c2_connect; - dev->ibdev.iwcm->accept = c2_accept; - dev->ibdev.iwcm->reject = c2_reject; - dev->ibdev.iwcm->create_listen = c2_service_create; - dev->ibdev.iwcm->destroy_listen = c2_service_destroy; - - ret = ib_register_device(&dev->ibdev, NULL); - if (ret) - goto out_free_iwcm; - - for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) { - ret = device_create_file(&dev->ibdev.dev, - c2_dev_attributes[i]); - if (ret) - goto out_unregister_ibdev; - } - goto out; - -out_unregister_ibdev: - ib_unregister_device(&dev->ibdev); -out_free_iwcm: - kfree(dev->ibdev.iwcm); -out_unregister_netdev: - unregister_netdev(dev->pseudo_netdev); -out_free_netdev: - free_netdev(dev->pseudo_netdev); -out: - pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret); - return ret; -} - -void c2_unregister_device(struct c2_dev *dev) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - unregister_netdev(dev->pseudo_netdev); - free_netdev(dev->pseudo_netdev); - ib_unregister_device(&dev->ibdev); -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_provider.h b/kernel/drivers/infiniband/hw/amso1100/c2_provider.h deleted file mode 100644 index bf1899877..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_provider.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef C2_PROVIDER_H -#define C2_PROVIDER_H -#include <linux/inetdevice.h> - -#include <rdma/ib_verbs.h> -#include <rdma/ib_pack.h> - -#include "c2_mq.h" -#include <rdma/iw_cm.h> - -#define C2_MPT_FLAG_ATOMIC (1 << 14) -#define C2_MPT_FLAG_REMOTE_WRITE (1 << 13) -#define C2_MPT_FLAG_REMOTE_READ (1 << 12) -#define C2_MPT_FLAG_LOCAL_WRITE (1 << 11) -#define C2_MPT_FLAG_LOCAL_READ (1 << 10) - -struct c2_buf_list { - void *buf; - DEFINE_DMA_UNMAP_ADDR(mapping); -}; - - -/* The user context keeps track of objects allocated for a - * particular user-mode client. */ -struct c2_ucontext { - struct ib_ucontext ibucontext; -}; - -struct c2_mtt; - -/* All objects associated with a PD are kept in the - * associated user context if present. - */ -struct c2_pd { - struct ib_pd ibpd; - u32 pd_id; -}; - -struct c2_mr { - struct ib_mr ibmr; - struct c2_pd *pd; - struct ib_umem *umem; -}; - -struct c2_av; - -enum c2_ah_type { - C2_AH_ON_HCA, - C2_AH_PCI_POOL, - C2_AH_KMALLOC -}; - -struct c2_ah { - struct ib_ah ibah; -}; - -struct c2_cq { - struct ib_cq ibcq; - spinlock_t lock; - atomic_t refcount; - int cqn; - int is_kernel; - wait_queue_head_t wait; - - u32 adapter_handle; - struct c2_mq mq; -}; - -struct c2_wq { - spinlock_t lock; -}; -struct iw_cm_id; -struct c2_qp { - struct ib_qp ibqp; - struct iw_cm_id *cm_id; - spinlock_t lock; - atomic_t refcount; - wait_queue_head_t wait; - int qpn; - - u32 adapter_handle; - u32 send_sgl_depth; - u32 recv_sgl_depth; - u32 rdma_write_sgl_depth; - u8 state; - - struct c2_mq sq_mq; - struct c2_mq rq_mq; -}; - -struct c2_cr_query_attrs { - u32 local_addr; - u32 remote_addr; - u16 local_port; - u16 remote_port; -}; - -static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct c2_pd, ibpd); -} - -static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext) -{ - return container_of(ibucontext, struct c2_ucontext, ibucontext); -} - -static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct c2_mr, ibmr); -} - - -static inline struct c2_ah *to_c2ah(struct ib_ah *ibah) -{ - return container_of(ibah, struct c2_ah, ibah); -} - -static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct c2_cq, ibcq); -} - -static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct c2_qp, ibqp); -} - -static inline int is_rnic_addr(struct net_device *netdev, u32 addr) -{ - struct in_device *ind; - int ret = 0; - - ind = in_dev_get(netdev); - if (!ind) - return 0; - - for_ifa(ind) { - if (ifa->ifa_address == addr) { - ret = 1; - break; - } - } - endfor_ifa(ind); - in_dev_put(ind); - return ret; -} -#endif /* C2_PROVIDER_H */ diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_qp.c b/kernel/drivers/infiniband/hw/amso1100/c2_qp.c deleted file mode 100644 index 86708dee5..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_qp.c +++ /dev/null @@ -1,1024 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * Copyright (c) 2004 Voltaire, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/delay.h> -#include <linux/gfp.h> - -#include "c2.h" -#include "c2_vq.h" -#include "c2_status.h" - -#define C2_MAX_ORD_PER_QP 128 -#define C2_MAX_IRD_PER_QP 128 - -#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count) -#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16) -#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF) - -#define NO_SUPPORT -1 -static const u8 c2_opcode[] = { - [IB_WR_SEND] = C2_WR_TYPE_SEND, - [IB_WR_SEND_WITH_IMM] = NO_SUPPORT, - [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT, - [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT, - [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT, -}; - -static int to_c2_state(enum ib_qp_state ib_state) -{ - switch (ib_state) { - case IB_QPS_RESET: - return C2_QP_STATE_IDLE; - case IB_QPS_RTS: - return C2_QP_STATE_RTS; - case IB_QPS_SQD: - return C2_QP_STATE_CLOSING; - case IB_QPS_SQE: - return C2_QP_STATE_CLOSING; - case IB_QPS_ERR: - return C2_QP_STATE_ERROR; - default: - return -1; - } -} - -static int to_ib_state(enum c2_qp_state c2_state) -{ - switch (c2_state) { - case C2_QP_STATE_IDLE: - return IB_QPS_RESET; - case C2_QP_STATE_CONNECTING: - return IB_QPS_RTR; - case C2_QP_STATE_RTS: - return IB_QPS_RTS; - case C2_QP_STATE_CLOSING: - return IB_QPS_SQD; - case C2_QP_STATE_ERROR: - return IB_QPS_ERR; - case C2_QP_STATE_TERMINATE: - return IB_QPS_SQE; - default: - return -1; - } -} - -static const char *to_ib_state_str(int ib_state) -{ - static const char *state_str[] = { - "IB_QPS_RESET", - "IB_QPS_INIT", - "IB_QPS_RTR", - "IB_QPS_RTS", - "IB_QPS_SQD", - "IB_QPS_SQE", - "IB_QPS_ERR" - }; - if (ib_state < IB_QPS_RESET || - ib_state > IB_QPS_ERR) - return "<invalid IB QP state>"; - - ib_state -= IB_QPS_RESET; - return state_str[ib_state]; -} - -void c2_set_qp_state(struct c2_qp *qp, int c2_state) -{ - int new_state = to_ib_state(c2_state); - - pr_debug("%s: qp[%p] state modify %s --> %s\n", - __func__, - qp, - to_ib_state_str(qp->state), - to_ib_state_str(new_state)); - qp->state = new_state; -} - -#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF - -int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, - struct ib_qp_attr *attr, int attr_mask) -{ - struct c2wr_qp_modify_req wr; - struct c2wr_qp_modify_rep *reply; - struct c2_vq_req *vq_req; - unsigned long flags; - u8 next_state; - int err; - - pr_debug("%s:%d qp=%p, %s --> %s\n", - __func__, __LINE__, - qp, - to_ib_state_str(qp->state), - to_ib_state_str(attr->qp_state)); - - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - c2_wr_set_id(&wr, CCWR_QP_MODIFY); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.qp_handle = qp->adapter_handle; - wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); - wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); - wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); - wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); - - if (attr_mask & IB_QP_STATE) { - /* Ensure the state is valid */ - if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) { - err = -EINVAL; - goto bail0; - } - - wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state)); - - if (attr->qp_state == IB_QPS_ERR) { - spin_lock_irqsave(&qp->lock, flags); - if (qp->cm_id && qp->state == IB_QPS_RTS) { - pr_debug("Generating CLOSE event for QP-->ERR, " - "qp=%p, cm_id=%p\n",qp,qp->cm_id); - /* Generate an CLOSE event */ - vq_req->cm_id = qp->cm_id; - vq_req->event = IW_CM_EVENT_CLOSE; - } - spin_unlock_irqrestore(&qp->lock, flags); - } - next_state = attr->qp_state; - - } else if (attr_mask & IB_QP_CUR_STATE) { - - if (attr->cur_qp_state != IB_QPS_RTR && - attr->cur_qp_state != IB_QPS_RTS && - attr->cur_qp_state != IB_QPS_SQD && - attr->cur_qp_state != IB_QPS_SQE) { - err = -EINVAL; - goto bail0; - } else - wr.next_qp_state = - cpu_to_be32(to_c2_state(attr->cur_qp_state)); - - next_state = attr->cur_qp_state; - - } else { - err = 0; - goto bail0; - } - - /* reference the request struct */ - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail0; - - reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg; - if (!reply) { - err = -ENOMEM; - goto bail0; - } - - err = c2_errno(reply); - if (!err) - qp->state = next_state; -#ifdef DEBUG - else - pr_debug("%s: c2_errno=%d\n", __func__, err); -#endif - /* - * If we're going to error and generating the event here, then - * we need to remove the reference because there will be no - * close event generated by the adapter - */ - spin_lock_irqsave(&qp->lock, flags); - if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) { - qp->cm_id->rem_ref(qp->cm_id); - qp->cm_id = NULL; - } - spin_unlock_irqrestore(&qp->lock, flags); - - vq_repbuf_free(c2dev, reply); - bail0: - vq_req_free(c2dev, vq_req); - - pr_debug("%s:%d qp=%p, cur_state=%s\n", - __func__, __LINE__, - qp, - to_ib_state_str(qp->state)); - return err; -} - -int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp, - int ord, int ird) -{ - struct c2wr_qp_modify_req wr; - struct c2wr_qp_modify_rep *reply; - struct c2_vq_req *vq_req; - int err; - - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - c2_wr_set_id(&wr, CCWR_QP_MODIFY); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.qp_handle = qp->adapter_handle; - wr.ord = cpu_to_be32(ord); - wr.ird = cpu_to_be32(ird); - wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); - wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); - wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); - - /* reference the request struct */ - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail0; - - reply = (struct c2wr_qp_modify_rep *) (unsigned long) - vq_req->reply_msg; - if (!reply) { - err = -ENOMEM; - goto bail0; - } - - err = c2_errno(reply); - vq_repbuf_free(c2dev, reply); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - -static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp) -{ - struct c2_vq_req *vq_req; - struct c2wr_qp_destroy_req wr; - struct c2wr_qp_destroy_rep *reply; - unsigned long flags; - int err; - - /* - * Allocate a verb request message - */ - vq_req = vq_req_alloc(c2dev); - if (!vq_req) { - return -ENOMEM; - } - - /* - * Initialize the WR - */ - c2_wr_set_id(&wr, CCWR_QP_DESTROY); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.qp_handle = qp->adapter_handle; - - /* - * reference the request struct. dereferenced in the int handler. - */ - vq_req_get(c2dev, vq_req); - - spin_lock_irqsave(&qp->lock, flags); - if (qp->cm_id && qp->state == IB_QPS_RTS) { - pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, " - "qp=%p, cm_id=%p\n",qp,qp->cm_id); - /* Generate an CLOSE event */ - vq_req->qp = qp; - vq_req->cm_id = qp->cm_id; - vq_req->event = IW_CM_EVENT_CLOSE; - } - spin_unlock_irqrestore(&qp->lock, flags); - - /* - * Send WR to adapter - */ - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - /* - * Wait for reply from adapter - */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) { - goto bail0; - } - - /* - * Process reply - */ - reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) { - err = -ENOMEM; - goto bail0; - } - - spin_lock_irqsave(&qp->lock, flags); - if (qp->cm_id) { - qp->cm_id->rem_ref(qp->cm_id); - qp->cm_id = NULL; - } - spin_unlock_irqrestore(&qp->lock, flags); - - vq_repbuf_free(c2dev, reply); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - -static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp) -{ - int ret; - - idr_preload(GFP_KERNEL); - spin_lock_irq(&c2dev->qp_table.lock); - - ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT); - if (ret >= 0) - qp->qpn = ret; - - spin_unlock_irq(&c2dev->qp_table.lock); - idr_preload_end(); - return ret < 0 ? ret : 0; -} - -static void c2_free_qpn(struct c2_dev *c2dev, int qpn) -{ - spin_lock_irq(&c2dev->qp_table.lock); - idr_remove(&c2dev->qp_table.idr, qpn); - spin_unlock_irq(&c2dev->qp_table.lock); -} - -struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn) -{ - unsigned long flags; - struct c2_qp *qp; - - spin_lock_irqsave(&c2dev->qp_table.lock, flags); - qp = idr_find(&c2dev->qp_table.idr, qpn); - spin_unlock_irqrestore(&c2dev->qp_table.lock, flags); - return qp; -} - -int c2_alloc_qp(struct c2_dev *c2dev, - struct c2_pd *pd, - struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp) -{ - struct c2wr_qp_create_req wr; - struct c2wr_qp_create_rep *reply; - struct c2_vq_req *vq_req; - struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq); - struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq); - unsigned long peer_pa; - u32 q_size, msg_size, mmap_size; - void __iomem *mmap; - int err; - - err = c2_alloc_qpn(c2dev, qp); - if (err) - return err; - qp->ibqp.qp_num = qp->qpn; - qp->ibqp.qp_type = IB_QPT_RC; - - /* Allocate the SQ and RQ shared pointers */ - qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, - &qp->sq_mq.shared_dma, GFP_KERNEL); - if (!qp->sq_mq.shared) { - err = -ENOMEM; - goto bail0; - } - - qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, - &qp->rq_mq.shared_dma, GFP_KERNEL); - if (!qp->rq_mq.shared) { - err = -ENOMEM; - goto bail1; - } - - /* Allocate the verbs request */ - vq_req = vq_req_alloc(c2dev); - if (vq_req == NULL) { - err = -ENOMEM; - goto bail2; - } - - /* Initialize the work request */ - memset(&wr, 0, sizeof(wr)); - c2_wr_set_id(&wr, CCWR_QP_CREATE); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - wr.sq_cq_handle = send_cq->adapter_handle; - wr.rq_cq_handle = recv_cq->adapter_handle; - wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1); - wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1); - wr.srq_handle = 0; - wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND | - QP_ZERO_STAG | QP_RDMA_READ_RESPONSE); - wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); - wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge); - wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); - wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma); - wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma); - wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP); - wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP); - wr.pd_id = pd->pd_id; - wr.user_context = (unsigned long) qp; - - vq_req_get(c2dev, vq_req); - - /* Send the WR to the adapter */ - err = vq_send_wr(c2dev, (union c2wr *) & wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail3; - } - - /* Wait for the verb reply */ - err = vq_wait_for_reply(c2dev, vq_req); - if (err) { - goto bail3; - } - - /* Process the reply */ - reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) { - err = -ENOMEM; - goto bail3; - } - - if ((err = c2_wr_get_result(reply)) != 0) { - goto bail4; - } - - /* Fill in the kernel QP struct */ - atomic_set(&qp->refcount, 1); - qp->adapter_handle = reply->qp_handle; - qp->state = IB_QPS_RESET; - qp->send_sgl_depth = qp_attrs->cap.max_send_sge; - qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge; - qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge; - init_waitqueue_head(&qp->wait); - - /* Initialize the SQ MQ */ - q_size = be32_to_cpu(reply->sq_depth); - msg_size = be32_to_cpu(reply->sq_msg_size); - peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start); - mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); - mmap = ioremap_nocache(peer_pa, mmap_size); - if (!mmap) { - err = -ENOMEM; - goto bail5; - } - - c2_mq_req_init(&qp->sq_mq, - be32_to_cpu(reply->sq_mq_index), - q_size, - msg_size, - mmap + sizeof(struct c2_mq_shared), /* pool start */ - mmap, /* peer */ - C2_MQ_ADAPTER_TARGET); - - /* Initialize the RQ mq */ - q_size = be32_to_cpu(reply->rq_depth); - msg_size = be32_to_cpu(reply->rq_msg_size); - peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start); - mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); - mmap = ioremap_nocache(peer_pa, mmap_size); - if (!mmap) { - err = -ENOMEM; - goto bail6; - } - - c2_mq_req_init(&qp->rq_mq, - be32_to_cpu(reply->rq_mq_index), - q_size, - msg_size, - mmap + sizeof(struct c2_mq_shared), /* pool start */ - mmap, /* peer */ - C2_MQ_ADAPTER_TARGET); - - vq_repbuf_free(c2dev, reply); - vq_req_free(c2dev, vq_req); - - return 0; - - bail6: - iounmap(qp->sq_mq.peer); - bail5: - destroy_qp(c2dev, qp); - bail4: - vq_repbuf_free(c2dev, reply); - bail3: - vq_req_free(c2dev, vq_req); - bail2: - c2_free_mqsp(qp->rq_mq.shared); - bail1: - c2_free_mqsp(qp->sq_mq.shared); - bail0: - c2_free_qpn(c2dev, qp->qpn); - return err; -} - -static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq) -{ - if (send_cq == recv_cq) - spin_lock_irq(&send_cq->lock); - else if (send_cq > recv_cq) { - spin_lock_irq(&send_cq->lock); - spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); - } else { - spin_lock_irq(&recv_cq->lock); - spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); - } -} - -static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq) -{ - if (send_cq == recv_cq) - spin_unlock_irq(&send_cq->lock); - else if (send_cq > recv_cq) { - spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); - } else { - spin_unlock(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); - } -} - -void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) -{ - struct c2_cq *send_cq; - struct c2_cq *recv_cq; - - send_cq = to_c2cq(qp->ibqp.send_cq); - recv_cq = to_c2cq(qp->ibqp.recv_cq); - - /* - * Lock CQs here, so that CQ polling code can do QP lookup - * without taking a lock. - */ - c2_lock_cqs(send_cq, recv_cq); - c2_free_qpn(c2dev, qp->qpn); - c2_unlock_cqs(send_cq, recv_cq); - - /* - * Destroy qp in the rnic... - */ - destroy_qp(c2dev, qp); - - /* - * Mark any unreaped CQEs as null and void. - */ - c2_cq_clean(c2dev, qp, send_cq->cqn); - if (send_cq != recv_cq) - c2_cq_clean(c2dev, qp, recv_cq->cqn); - /* - * Unmap the MQs and return the shared pointers - * to the message pool. - */ - iounmap(qp->sq_mq.peer); - iounmap(qp->rq_mq.peer); - c2_free_mqsp(qp->sq_mq.shared); - c2_free_mqsp(qp->rq_mq.shared); - - atomic_dec(&qp->refcount); - wait_event(qp->wait, !atomic_read(&qp->refcount)); -} - -/* - * Function: move_sgl - * - * Description: - * Move an SGL from the user's work request struct into a CCIL Work Request - * message, swapping to WR byte order and ensure the total length doesn't - * overflow. - * - * IN: - * dst - ptr to CCIL Work Request message SGL memory. - * src - ptr to the consumers SGL memory. - * - * OUT: none - * - * Return: - * CCIL status codes. - */ -static int -move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len, - u8 * actual_count) -{ - u32 tot = 0; /* running total */ - u8 acount = 0; /* running total non-0 len sge's */ - - while (count > 0) { - /* - * If the addition of this SGE causes the - * total SGL length to exceed 2^32-1, then - * fail-n-bail. - * - * If the current total plus the next element length - * wraps, then it will go negative and be less than the - * current total... - */ - if ((tot + src->length) < tot) { - return -EINVAL; - } - /* - * Bug: 1456 (as well as 1498 & 1643) - * Skip over any sge's supplied with len=0 - */ - if (src->length) { - tot += src->length; - dst->stag = cpu_to_be32(src->lkey); - dst->to = cpu_to_be64(src->addr); - dst->length = cpu_to_be32(src->length); - dst++; - acount++; - } - src++; - count--; - } - - if (acount == 0) { - /* - * Bug: 1476 (as well as 1498, 1456 and 1643) - * Setup the SGL in the WR to make it easier for the RNIC. - * This way, the FW doesn't have to deal with special cases. - * Setting length=0 should be sufficient. - */ - dst->stag = 0; - dst->to = 0; - dst->length = 0; - } - - *p_len = tot; - *actual_count = acount; - return 0; -} - -/* - * Function: c2_activity (private function) - * - * Description: - * Post an mq index to the host->adapter activity fifo. - * - * IN: - * c2dev - ptr to c2dev structure - * mq_index - mq index to post - * shared - value most recently written to shared - * - * OUT: - * - * Return: - * none - */ -static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared) -{ - /* - * First read the register to see if the FIFO is full, and if so, - * spin until it's not. This isn't perfect -- there is no - * synchronization among the clients of the register, but in - * practice it prevents multiple CPU from hammering the bus - * with PCI RETRY. Note that when this does happen, the card - * cannot get on the bus and the card and system hang in a - * deadlock -- thus the need for this code. [TOT] - */ - while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) - udelay(10); - - __raw_writel(C2_HINT_MAKE(mq_index, shared), - c2dev->regs + PCI_BAR0_ADAPTER_HINT); -} - -/* - * Function: qp_wr_post - * - * Description: - * This in-line function allocates a MQ msg, then moves the host-copy of - * the completed WR into msg. Then it posts the message. - * - * IN: - * q - ptr to user MQ. - * wr - ptr to host-copy of the WR. - * qp - ptr to user qp - * size - Number of bytes to post. Assumed to be divisible by 4. - * - * OUT: none - * - * Return: - * CCIL status codes. - */ -static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size) -{ - union c2wr *msg; - - msg = c2_mq_alloc(q); - if (msg == NULL) { - return -EINVAL; - } -#ifdef CCMSGMAGIC - ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC); -#endif - - /* - * Since all header fields in the WR are the same as the - * CQE, set the following so the adapter need not. - */ - c2_wr_set_result(wr, CCERR_PENDING); - - /* - * Copy the wr down to the adapter - */ - memcpy((void *) msg, (void *) wr, size); - - c2_mq_produce(q); - return 0; -} - - -int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, - struct ib_send_wr **bad_wr) -{ - struct c2_dev *c2dev = to_c2dev(ibqp->device); - struct c2_qp *qp = to_c2qp(ibqp); - union c2wr wr; - unsigned long lock_flags; - int err = 0; - - u32 flags; - u32 tot_len; - u8 actual_sge_count; - u32 msg_size; - - if (qp->state > IB_QPS_RTS) { - err = -EINVAL; - goto out; - } - - while (ib_wr) { - - flags = 0; - wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id; - if (ib_wr->send_flags & IB_SEND_SIGNALED) { - flags |= SQ_SIGNALED; - } - - switch (ib_wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_INV: - if (ib_wr->opcode == IB_WR_SEND) { - if (ib_wr->send_flags & IB_SEND_SOLICITED) - c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE); - else - c2_wr_set_id(&wr, C2_WR_TYPE_SEND); - wr.sqwr.send.remote_stag = 0; - } else { - if (ib_wr->send_flags & IB_SEND_SOLICITED) - c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV); - else - c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV); - wr.sqwr.send.remote_stag = - cpu_to_be32(ib_wr->ex.invalidate_rkey); - } - - msg_size = sizeof(struct c2wr_send_req) + - sizeof(struct c2_data_addr) * ib_wr->num_sge; - if (ib_wr->num_sge > qp->send_sgl_depth) { - err = -EINVAL; - break; - } - if (ib_wr->send_flags & IB_SEND_FENCE) { - flags |= SQ_READ_FENCE; - } - err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data), - ib_wr->sg_list, - ib_wr->num_sge, - &tot_len, &actual_sge_count); - wr.sqwr.send.sge_len = cpu_to_be32(tot_len); - c2_wr_set_sge_count(&wr, actual_sge_count); - break; - case IB_WR_RDMA_WRITE: - c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE); - msg_size = sizeof(struct c2wr_rdma_write_req) + - (sizeof(struct c2_data_addr) * ib_wr->num_sge); - if (ib_wr->num_sge > qp->rdma_write_sgl_depth) { - err = -EINVAL; - break; - } - if (ib_wr->send_flags & IB_SEND_FENCE) { - flags |= SQ_READ_FENCE; - } - wr.sqwr.rdma_write.remote_stag = - cpu_to_be32(ib_wr->wr.rdma.rkey); - wr.sqwr.rdma_write.remote_to = - cpu_to_be64(ib_wr->wr.rdma.remote_addr); - err = move_sgl((struct c2_data_addr *) - & (wr.sqwr.rdma_write.data), - ib_wr->sg_list, - ib_wr->num_sge, - &tot_len, &actual_sge_count); - wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len); - c2_wr_set_sge_count(&wr, actual_sge_count); - break; - case IB_WR_RDMA_READ: - c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ); - msg_size = sizeof(struct c2wr_rdma_read_req); - - /* IWarp only suppots 1 sge for RDMA reads */ - if (ib_wr->num_sge > 1) { - err = -EINVAL; - break; - } - - /* - * Move the local and remote stag/to/len into the WR. - */ - wr.sqwr.rdma_read.local_stag = - cpu_to_be32(ib_wr->sg_list->lkey); - wr.sqwr.rdma_read.local_to = - cpu_to_be64(ib_wr->sg_list->addr); - wr.sqwr.rdma_read.remote_stag = - cpu_to_be32(ib_wr->wr.rdma.rkey); - wr.sqwr.rdma_read.remote_to = - cpu_to_be64(ib_wr->wr.rdma.remote_addr); - wr.sqwr.rdma_read.length = - cpu_to_be32(ib_wr->sg_list->length); - break; - default: - /* error */ - msg_size = 0; - err = -EINVAL; - break; - } - - /* - * If we had an error on the last wr build, then - * break out. Possible errors include bogus WR - * type, and a bogus SGL length... - */ - if (err) { - break; - } - - /* - * Store flags - */ - c2_wr_set_flags(&wr, flags); - - /* - * Post the puppy! - */ - spin_lock_irqsave(&qp->lock, lock_flags); - err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size); - if (err) { - spin_unlock_irqrestore(&qp->lock, lock_flags); - break; - } - - /* - * Enqueue mq index to activity FIFO. - */ - c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count); - spin_unlock_irqrestore(&qp->lock, lock_flags); - - ib_wr = ib_wr->next; - } - -out: - if (err) - *bad_wr = ib_wr; - return err; -} - -int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, - struct ib_recv_wr **bad_wr) -{ - struct c2_dev *c2dev = to_c2dev(ibqp->device); - struct c2_qp *qp = to_c2qp(ibqp); - union c2wr wr; - unsigned long lock_flags; - int err = 0; - - if (qp->state > IB_QPS_RTS) { - err = -EINVAL; - goto out; - } - - /* - * Try and post each work request - */ - while (ib_wr) { - u32 tot_len; - u8 actual_sge_count; - - if (ib_wr->num_sge > qp->recv_sgl_depth) { - err = -EINVAL; - break; - } - - /* - * Create local host-copy of the WR - */ - wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id; - c2_wr_set_id(&wr, CCWR_RECV); - c2_wr_set_flags(&wr, 0); - - /* sge_count is limited to eight bits. */ - BUG_ON(ib_wr->num_sge >= 256); - err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data), - ib_wr->sg_list, - ib_wr->num_sge, &tot_len, &actual_sge_count); - c2_wr_set_sge_count(&wr, actual_sge_count); - - /* - * If we had an error on the last wr build, then - * break out. Possible errors include bogus WR - * type, and a bogus SGL length... - */ - if (err) { - break; - } - - spin_lock_irqsave(&qp->lock, lock_flags); - err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size); - if (err) { - spin_unlock_irqrestore(&qp->lock, lock_flags); - break; - } - - /* - * Enqueue mq index to activity FIFO - */ - c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count); - spin_unlock_irqrestore(&qp->lock, lock_flags); - - ib_wr = ib_wr->next; - } - -out: - if (err) - *bad_wr = ib_wr; - return err; -} - -void c2_init_qp_table(struct c2_dev *c2dev) -{ - spin_lock_init(&c2dev->qp_table.lock); - idr_init(&c2dev->qp_table.idr); -} - -void c2_cleanup_qp_table(struct c2_dev *c2dev) -{ - idr_destroy(&c2dev->qp_table.idr); -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_rnic.c b/kernel/drivers/infiniband/hw/amso1100/c2_rnic.c deleted file mode 100644 index d2a6d9613..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_rnic.c +++ /dev/null @@ -1,655 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/pci.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/delay.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/if_vlan.h> -#include <linux/crc32.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/init.h> -#include <linux/dma-mapping.h> -#include <linux/mm.h> -#include <linux/inet.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> - -#include <linux/route.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/byteorder.h> -#include <rdma/ib_smi.h> -#include "c2.h" -#include "c2_vq.h" - -/* Device capabilities */ -#define C2_MIN_PAGESIZE 1024 - -#define C2_MAX_MRS 32768 -#define C2_MAX_QPS 16000 -#define C2_MAX_WQE_SZ 256 -#define C2_MAX_QP_WR ((128*1024)/C2_MAX_WQE_SZ) -#define C2_MAX_SGES 4 -#define C2_MAX_SGE_RD 1 -#define C2_MAX_CQS 32768 -#define C2_MAX_CQES 4096 -#define C2_MAX_PDS 16384 - -/* - * Send the adapter INIT message to the amso1100 - */ -static int c2_adapter_init(struct c2_dev *c2dev) -{ - struct c2wr_init_req wr; - int err; - - memset(&wr, 0, sizeof(wr)); - c2_wr_set_id(&wr, CCWR_INIT); - wr.hdr.context = 0; - wr.hint_count = cpu_to_be64(c2dev->hint_count_dma); - wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma); - wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma); - wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma); - wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma); - wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma); - - /* Post the init message */ - err = vq_send_wr(c2dev, (union c2wr *) & wr); - - return err; -} - -/* - * Send the adapter TERM message to the amso1100 - */ -static void c2_adapter_term(struct c2_dev *c2dev) -{ - struct c2wr_init_req wr; - - memset(&wr, 0, sizeof(wr)); - c2_wr_set_id(&wr, CCWR_TERM); - wr.hdr.context = 0; - - /* Post the init message */ - vq_send_wr(c2dev, (union c2wr *) & wr); - c2dev->init = 0; - - return; -} - -/* - * Query the adapter - */ -static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props) -{ - struct c2_vq_req *vq_req; - struct c2wr_rnic_query_req wr; - struct c2wr_rnic_query_rep *reply; - int err; - - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - c2_wr_set_id(&wr, CCWR_RNIC_QUERY); - wr.hdr.context = (unsigned long) vq_req; - wr.rnic_handle = c2dev->adapter_handle; - - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, (union c2wr *) &wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail1; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail1; - - reply = - (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) - err = -ENOMEM; - else - err = c2_errno(reply); - if (err) - goto bail2; - - props->fw_ver = - ((u64)be32_to_cpu(reply->fw_ver_major) << 32) | - ((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) | - (be32_to_cpu(reply->fw_ver_patch) & 0xFFFF); - memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6); - props->max_mr_size = 0xFFFFFFFF; - props->page_size_cap = ~(C2_MIN_PAGESIZE-1); - props->vendor_id = be32_to_cpu(reply->vendor_id); - props->vendor_part_id = be32_to_cpu(reply->part_number); - props->hw_ver = be32_to_cpu(reply->hw_version); - props->max_qp = be32_to_cpu(reply->max_qps); - props->max_qp_wr = be32_to_cpu(reply->max_qp_depth); - props->device_cap_flags = c2dev->device_cap_flags; - props->max_sge = C2_MAX_SGES; - props->max_sge_rd = C2_MAX_SGE_RD; - props->max_cq = be32_to_cpu(reply->max_cqs); - props->max_cqe = be32_to_cpu(reply->max_cq_depth); - props->max_mr = be32_to_cpu(reply->max_mrs); - props->max_pd = be32_to_cpu(reply->max_pds); - props->max_qp_rd_atom = be32_to_cpu(reply->max_qp_ird); - props->max_ee_rd_atom = 0; - props->max_res_rd_atom = be32_to_cpu(reply->max_global_ird); - props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord); - props->max_ee_init_rd_atom = 0; - props->atomic_cap = IB_ATOMIC_NONE; - props->max_ee = 0; - props->max_rdd = 0; - props->max_mw = be32_to_cpu(reply->max_mws); - props->max_raw_ipv6_qp = 0; - props->max_raw_ethy_qp = 0; - props->max_mcast_grp = 0; - props->max_mcast_qp_attach = 0; - props->max_total_mcast_qp_attach = 0; - props->max_ah = 0; - props->max_fmr = 0; - props->max_map_per_fmr = 0; - props->max_srq = 0; - props->max_srq_wr = 0; - props->max_srq_sge = 0; - props->max_pkeys = 0; - props->local_ca_ack_delay = 0; - - bail2: - vq_repbuf_free(c2dev, reply); - - bail1: - vq_req_free(c2dev, vq_req); - return err; -} - -/* - * Add an IP address to the RNIC interface - */ -int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask) -{ - struct c2_vq_req *vq_req; - struct c2wr_rnic_setconfig_req *wr; - struct c2wr_rnic_setconfig_rep *reply; - struct c2_netaddr netaddr; - int err, len; - - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - len = sizeof(struct c2_netaddr); - wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); - if (!wr) { - err = -ENOMEM; - goto bail0; - } - - c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG); - wr->hdr.context = (unsigned long) vq_req; - wr->rnic_handle = c2dev->adapter_handle; - wr->option = cpu_to_be32(C2_CFG_ADD_ADDR); - - netaddr.ip_addr = inaddr; - netaddr.netmask = inmask; - netaddr.mtu = 0; - - memcpy(wr->data, &netaddr, len); - - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, (union c2wr *) wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail1; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail1; - - reply = - (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) { - err = -ENOMEM; - goto bail1; - } - - err = c2_errno(reply); - vq_repbuf_free(c2dev, reply); - - bail1: - kfree(wr); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - -/* - * Delete an IP address from the RNIC interface - */ -int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask) -{ - struct c2_vq_req *vq_req; - struct c2wr_rnic_setconfig_req *wr; - struct c2wr_rnic_setconfig_rep *reply; - struct c2_netaddr netaddr; - int err, len; - - vq_req = vq_req_alloc(c2dev); - if (!vq_req) - return -ENOMEM; - - len = sizeof(struct c2_netaddr); - wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); - if (!wr) { - err = -ENOMEM; - goto bail0; - } - - c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG); - wr->hdr.context = (unsigned long) vq_req; - wr->rnic_handle = c2dev->adapter_handle; - wr->option = cpu_to_be32(C2_CFG_DEL_ADDR); - - netaddr.ip_addr = inaddr; - netaddr.netmask = inmask; - netaddr.mtu = 0; - - memcpy(wr->data, &netaddr, len); - - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, (union c2wr *) wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail1; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) - goto bail1; - - reply = - (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) { - err = -ENOMEM; - goto bail1; - } - - err = c2_errno(reply); - vq_repbuf_free(c2dev, reply); - - bail1: - kfree(wr); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - -/* - * Open a single RNIC instance to use with all - * low level openib calls - */ -static int c2_rnic_open(struct c2_dev *c2dev) -{ - struct c2_vq_req *vq_req; - union c2wr wr; - struct c2wr_rnic_open_rep *reply; - int err; - - vq_req = vq_req_alloc(c2dev); - if (vq_req == NULL) { - return -ENOMEM; - } - - memset(&wr, 0, sizeof(wr)); - c2_wr_set_id(&wr, CCWR_RNIC_OPEN); - wr.rnic_open.req.hdr.context = (unsigned long) (vq_req); - wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE); - wr.rnic_open.req.port_num = cpu_to_be16(0); - wr.rnic_open.req.user_context = (unsigned long) c2dev; - - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, &wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) { - goto bail0; - } - - reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) { - err = -ENOMEM; - goto bail0; - } - - if ((err = c2_errno(reply)) != 0) { - goto bail1; - } - - c2dev->adapter_handle = reply->rnic_handle; - - bail1: - vq_repbuf_free(c2dev, reply); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - -/* - * Close the RNIC instance - */ -static int c2_rnic_close(struct c2_dev *c2dev) -{ - struct c2_vq_req *vq_req; - union c2wr wr; - struct c2wr_rnic_close_rep *reply; - int err; - - vq_req = vq_req_alloc(c2dev); - if (vq_req == NULL) { - return -ENOMEM; - } - - memset(&wr, 0, sizeof(wr)); - c2_wr_set_id(&wr, CCWR_RNIC_CLOSE); - wr.rnic_close.req.hdr.context = (unsigned long) vq_req; - wr.rnic_close.req.rnic_handle = c2dev->adapter_handle; - - vq_req_get(c2dev, vq_req); - - err = vq_send_wr(c2dev, &wr); - if (err) { - vq_req_put(c2dev, vq_req); - goto bail0; - } - - err = vq_wait_for_reply(c2dev, vq_req); - if (err) { - goto bail0; - } - - reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg); - if (!reply) { - err = -ENOMEM; - goto bail0; - } - - if ((err = c2_errno(reply)) != 0) { - goto bail1; - } - - c2dev->adapter_handle = 0; - - bail1: - vq_repbuf_free(c2dev, reply); - bail0: - vq_req_free(c2dev, vq_req); - return err; -} - -/* - * Called by c2_probe to initialize the RNIC. This principally - * involves initializing the various limits and resource pools that - * comprise the RNIC instance. - */ -int c2_rnic_init(struct c2_dev *c2dev) -{ - int err; - u32 qsize, msgsize; - void *q1_pages; - void *q2_pages; - void __iomem *mmio_regs; - - /* Device capabilities */ - c2dev->device_cap_flags = - (IB_DEVICE_RESIZE_MAX_WR | - IB_DEVICE_CURR_QP_STATE_MOD | - IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW); - - /* Allocate the qptr_array */ - c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *)); - if (!c2dev->qptr_array) { - return -ENOMEM; - } - - /* Initialize the qptr_array */ - c2dev->qptr_array[0] = (void *) &c2dev->req_vq; - c2dev->qptr_array[1] = (void *) &c2dev->rep_vq; - c2dev->qptr_array[2] = (void *) &c2dev->aeq; - - /* Initialize data structures */ - init_waitqueue_head(&c2dev->req_vq_wo); - spin_lock_init(&c2dev->vqlock); - spin_lock_init(&c2dev->lock); - - /* Allocate MQ shared pointer pool for kernel clients. User - * mode client pools are hung off the user context - */ - err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool); - if (err) { - goto bail0; - } - - /* Allocate shared pointers for Q0, Q1, and Q2 from - * the shared pointer pool. - */ - - c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, - &c2dev->hint_count_dma, - GFP_KERNEL); - c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, - &c2dev->req_vq.shared_dma, - GFP_KERNEL); - c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, - &c2dev->rep_vq.shared_dma, - GFP_KERNEL); - c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, - &c2dev->aeq.shared_dma, GFP_KERNEL); - if (!c2dev->hint_count || !c2dev->req_vq.shared || - !c2dev->rep_vq.shared || !c2dev->aeq.shared) { - err = -ENOMEM; - goto bail1; - } - - mmio_regs = c2dev->kva; - /* Initialize the Verbs Request Queue */ - c2_mq_req_init(&c2dev->req_vq, 0, - be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)), - be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)), - mmio_regs + - be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)), - mmio_regs + - be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)), - C2_MQ_ADAPTER_TARGET); - - /* Initialize the Verbs Reply Queue */ - qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE)); - msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE)); - q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize, - &c2dev->rep_vq.host_dma, GFP_KERNEL); - if (!q1_pages) { - err = -ENOMEM; - goto bail1; - } - dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); - pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages, - (unsigned long long) c2dev->rep_vq.host_dma); - c2_mq_rep_init(&c2dev->rep_vq, - 1, - qsize, - msgsize, - q1_pages, - mmio_regs + - be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)), - C2_MQ_HOST_TARGET); - - /* Initialize the Asynchronus Event Queue */ - qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE)); - msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE)); - q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize, - &c2dev->aeq.host_dma, GFP_KERNEL); - if (!q2_pages) { - err = -ENOMEM; - goto bail2; - } - dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); - pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages, - (unsigned long long) c2dev->aeq.host_dma); - c2_mq_rep_init(&c2dev->aeq, - 2, - qsize, - msgsize, - q2_pages, - mmio_regs + - be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)), - C2_MQ_HOST_TARGET); - - /* Initialize the verbs request allocator */ - err = vq_init(c2dev); - if (err) - goto bail3; - - /* Enable interrupts on the adapter */ - writel(0, c2dev->regs + C2_IDIS); - - /* create the WR init message */ - err = c2_adapter_init(c2dev); - if (err) - goto bail4; - c2dev->init++; - - /* open an adapter instance */ - err = c2_rnic_open(c2dev); - if (err) - goto bail4; - - /* Initialize cached the adapter limits */ - err = c2_rnic_query(c2dev, &c2dev->props); - if (err) - goto bail5; - - /* Initialize the PD pool */ - err = c2_init_pd_table(c2dev); - if (err) - goto bail5; - - /* Initialize the QP pool */ - c2_init_qp_table(c2dev); - return 0; - - bail5: - c2_rnic_close(c2dev); - bail4: - vq_term(c2dev); - bail3: - dma_free_coherent(&c2dev->pcidev->dev, - c2dev->aeq.q_size * c2dev->aeq.msg_size, - q2_pages, dma_unmap_addr(&c2dev->aeq, mapping)); - bail2: - dma_free_coherent(&c2dev->pcidev->dev, - c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, - q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping)); - bail1: - c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); - bail0: - vfree(c2dev->qptr_array); - - return err; -} - -/* - * Called by c2_remove to cleanup the RNIC resources. - */ -void c2_rnic_term(struct c2_dev *c2dev) -{ - - /* Close the open adapter instance */ - c2_rnic_close(c2dev); - - /* Send the TERM message to the adapter */ - c2_adapter_term(c2dev); - - /* Disable interrupts on the adapter */ - writel(1, c2dev->regs + C2_IDIS); - - /* Free the QP pool */ - c2_cleanup_qp_table(c2dev); - - /* Free the PD pool */ - c2_cleanup_pd_table(c2dev); - - /* Free the verbs request allocator */ - vq_term(c2dev); - - /* Free the asynchronus event queue */ - dma_free_coherent(&c2dev->pcidev->dev, - c2dev->aeq.q_size * c2dev->aeq.msg_size, - c2dev->aeq.msg_pool.host, - dma_unmap_addr(&c2dev->aeq, mapping)); - - /* Free the verbs reply queue */ - dma_free_coherent(&c2dev->pcidev->dev, - c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, - c2dev->rep_vq.msg_pool.host, - dma_unmap_addr(&c2dev->rep_vq, mapping)); - - /* Free the MQ shared pointer pool */ - c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); - - /* Free the qptr_array */ - vfree(c2dev->qptr_array); - - return; -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_status.h b/kernel/drivers/infiniband/hw/amso1100/c2_status.h deleted file mode 100644 index 6ee4aa92d..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_status.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _C2_STATUS_H_ -#define _C2_STATUS_H_ - -/* - * Verbs Status Codes - */ -enum c2_status { - C2_OK = 0, /* This must be zero */ - CCERR_INSUFFICIENT_RESOURCES = 1, - CCERR_INVALID_MODIFIER = 2, - CCERR_INVALID_MODE = 3, - CCERR_IN_USE = 4, - CCERR_INVALID_RNIC = 5, - CCERR_INTERRUPTED_OPERATION = 6, - CCERR_INVALID_EH = 7, - CCERR_INVALID_CQ = 8, - CCERR_CQ_EMPTY = 9, - CCERR_NOT_IMPLEMENTED = 10, - CCERR_CQ_DEPTH_TOO_SMALL = 11, - CCERR_PD_IN_USE = 12, - CCERR_INVALID_PD = 13, - CCERR_INVALID_SRQ = 14, - CCERR_INVALID_ADDRESS = 15, - CCERR_INVALID_NETMASK = 16, - CCERR_INVALID_QP = 17, - CCERR_INVALID_QP_STATE = 18, - CCERR_TOO_MANY_WRS_POSTED = 19, - CCERR_INVALID_WR_TYPE = 20, - CCERR_INVALID_SGL_LENGTH = 21, - CCERR_INVALID_SQ_DEPTH = 22, - CCERR_INVALID_RQ_DEPTH = 23, - CCERR_INVALID_ORD = 24, - CCERR_INVALID_IRD = 25, - CCERR_QP_ATTR_CANNOT_CHANGE = 26, - CCERR_INVALID_STAG = 27, - CCERR_QP_IN_USE = 28, - CCERR_OUTSTANDING_WRS = 29, - CCERR_STAG_IN_USE = 30, - CCERR_INVALID_STAG_INDEX = 31, - CCERR_INVALID_SGL_FORMAT = 32, - CCERR_ADAPTER_TIMEOUT = 33, - CCERR_INVALID_CQ_DEPTH = 34, - CCERR_INVALID_PRIVATE_DATA_LENGTH = 35, - CCERR_INVALID_EP = 36, - CCERR_MR_IN_USE = CCERR_STAG_IN_USE, - CCERR_FLUSHED = 38, - CCERR_INVALID_WQE = 39, - CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40, - CCERR_REMOTE_TERMINATION_ERROR = 41, - CCERR_BASE_AND_BOUNDS_VIOLATION = 42, - CCERR_ACCESS_VIOLATION = 43, - CCERR_INVALID_PD_ID = 44, - CCERR_WRAP_ERROR = 45, - CCERR_INV_STAG_ACCESS_ERROR = 46, - CCERR_ZERO_RDMA_READ_RESOURCES = 47, - CCERR_QP_NOT_PRIVILEGED = 48, - CCERR_STAG_STATE_NOT_INVALID = 49, - CCERR_INVALID_PAGE_SIZE = 50, - CCERR_INVALID_BUFFER_SIZE = 51, - CCERR_INVALID_PBE = 52, - CCERR_INVALID_FBO = 53, - CCERR_INVALID_LENGTH = 54, - CCERR_INVALID_ACCESS_RIGHTS = 55, - CCERR_PBL_TOO_BIG = 56, - CCERR_INVALID_VA = 57, - CCERR_INVALID_REGION = 58, - CCERR_INVALID_WINDOW = 59, - CCERR_TOTAL_LENGTH_TOO_BIG = 60, - CCERR_INVALID_QP_ID = 61, - CCERR_ADDR_IN_USE = 62, - CCERR_ADDR_NOT_AVAIL = 63, - CCERR_NET_DOWN = 64, - CCERR_NET_UNREACHABLE = 65, - CCERR_CONN_ABORTED = 66, - CCERR_CONN_RESET = 67, - CCERR_NO_BUFS = 68, - CCERR_CONN_TIMEDOUT = 69, - CCERR_CONN_REFUSED = 70, - CCERR_HOST_UNREACHABLE = 71, - CCERR_INVALID_SEND_SGL_DEPTH = 72, - CCERR_INVALID_RECV_SGL_DEPTH = 73, - CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74, - CCERR_INSUFFICIENT_PRIVILEGES = 75, - CCERR_STACK_ERROR = 76, - CCERR_INVALID_VERSION = 77, - CCERR_INVALID_MTU = 78, - CCERR_INVALID_IMAGE = 79, - CCERR_PENDING = 98, /* not an error; user internally by adapter */ - CCERR_DEFER = 99, /* not an error; used internally by adapter */ - CCERR_FAILED_WRITE = 100, - CCERR_FAILED_ERASE = 101, - CCERR_FAILED_VERIFICATION = 102, - CCERR_NOT_FOUND = 103, - -}; - -/* - * CCAE_ACTIVE_CONNECT_RESULTS status result codes. - */ -enum c2_connect_status { - C2_CONN_STATUS_SUCCESS = C2_OK, - C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES, - C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT, - C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED, - C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE, - C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE, - C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC, - C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP, - C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE, - C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET, - C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL, -}; - -/* - * Flash programming status codes. - */ -enum c2_flash_status { - C2_FLASH_STATUS_SUCCESS = 0x0000, - C2_FLASH_STATUS_VERIFY_ERR = 0x0002, - C2_FLASH_STATUS_IMAGE_ERR = 0x0004, - C2_FLASH_STATUS_ECLBS = 0x0400, - C2_FLASH_STATUS_PSLBS = 0x0800, - C2_FLASH_STATUS_VPENS = 0x1000, -}; - -#endif /* _C2_STATUS_H_ */ diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_user.h b/kernel/drivers/infiniband/hw/amso1100/c2_user.h deleted file mode 100644 index 7e9e7ad65..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_user.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef C2_USER_H -#define C2_USER_H - -#include <linux/types.h> - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. - */ - -struct c2_alloc_ucontext_resp { - __u32 qp_tab_size; - __u32 uarc_size; -}; - -struct c2_alloc_pd_resp { - __u32 pdn; - __u32 reserved; -}; - -struct c2_create_cq { - __u32 lkey; - __u32 pdn; - __u64 arm_db_page; - __u64 set_db_page; - __u32 arm_db_index; - __u32 set_db_index; -}; - -struct c2_create_cq_resp { - __u32 cqn; - __u32 reserved; -}; - -struct c2_create_qp { - __u32 lkey; - __u32 reserved; - __u64 sq_db_page; - __u64 rq_db_page; - __u32 sq_db_index; - __u32 rq_db_index; -}; - -#endif /* C2_USER_H */ diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_vq.c b/kernel/drivers/infiniband/hw/amso1100/c2_vq.c deleted file mode 100644 index 2ec716fb2..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_vq.c +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/slab.h> -#include <linux/spinlock.h> - -#include "c2_vq.h" -#include "c2_provider.h" - -/* - * Verbs Request Objects: - * - * VQ Request Objects are allocated by the kernel verbs handlers. - * They contain a wait object, a refcnt, an atomic bool indicating that the - * adapter has replied, and a copy of the verb reply work request. - * A pointer to the VQ Request Object is passed down in the context - * field of the work request message, and reflected back by the adapter - * in the verbs reply message. The function handle_vq() in the interrupt - * path will use this pointer to: - * 1) append a copy of the verbs reply message - * 2) mark that the reply is ready - * 3) wake up the kernel verbs handler blocked awaiting the reply. - * - * - * The kernel verbs handlers do a "get" to put a 2nd reference on the - * VQ Request object. If the kernel verbs handler exits before the adapter - * can respond, this extra reference will keep the VQ Request object around - * until the adapter's reply can be processed. The reason we need this is - * because a pointer to this object is stuffed into the context field of - * the verbs work request message, and reflected back in the reply message. - * It is used in the interrupt handler (handle_vq()) to wake up the appropriate - * kernel verb handler that is blocked awaiting the verb reply. - * So handle_vq() will do a "put" on the object when it's done accessing it. - * NOTE: If we guarantee that the kernel verb handler will never bail before - * getting the reply, then we don't need these refcnts. - * - * - * VQ Request objects are freed by the kernel verbs handlers only - * after the verb has been processed, or when the adapter fails and - * does not reply. - * - * - * Verbs Reply Buffers: - * - * VQ Reply bufs are local host memory copies of a - * outstanding Verb Request reply - * message. The are always allocated by the kernel verbs handlers, and _may_ be - * freed by either the kernel verbs handler -or- the interrupt handler. The - * kernel verbs handler _must_ free the repbuf, then free the vq request object - * in that order. - */ - -int vq_init(struct c2_dev *c2dev) -{ - sprintf(c2dev->vq_cache_name, "c2-vq:dev%c", - (char) ('0' + c2dev->devnum)); - c2dev->host_msg_cache = - kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0, - SLAB_HWCACHE_ALIGN, NULL); - if (c2dev->host_msg_cache == NULL) { - return -ENOMEM; - } - return 0; -} - -void vq_term(struct c2_dev *c2dev) -{ - kmem_cache_destroy(c2dev->host_msg_cache); -} - -/* vq_req_alloc - allocate a VQ Request Object and initialize it. - * The refcnt is set to 1. - */ -struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev) -{ - struct c2_vq_req *r; - - r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL); - if (r) { - init_waitqueue_head(&r->wait_object); - r->reply_msg = 0; - r->event = 0; - r->cm_id = NULL; - r->qp = NULL; - atomic_set(&r->refcnt, 1); - atomic_set(&r->reply_ready, 0); - } - return r; -} - - -/* vq_req_free - free the VQ Request Object. It is assumed the verbs handler - * has already free the VQ Reply Buffer if it existed. - */ -void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r) -{ - r->reply_msg = 0; - if (atomic_dec_and_test(&r->refcnt)) { - kfree(r); - } -} - -/* vq_req_get - reference a VQ Request Object. Done - * only in the kernel verbs handlers. - */ -void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r) -{ - atomic_inc(&r->refcnt); -} - - -/* vq_req_put - dereference and potentially free a VQ Request Object. - * - * This is only called by handle_vq() on the - * interrupt when it is done processing - * a verb reply message. If the associated - * kernel verbs handler has already bailed, - * then this put will actually free the VQ - * Request object _and_ the VQ Reply Buffer - * if it exists. - */ -void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r) -{ - if (atomic_dec_and_test(&r->refcnt)) { - if (r->reply_msg != 0) - vq_repbuf_free(c2dev, - (void *) (unsigned long) r->reply_msg); - kfree(r); - } -} - - -/* - * vq_repbuf_alloc - allocate a VQ Reply Buffer. - */ -void *vq_repbuf_alloc(struct c2_dev *c2dev) -{ - return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC); -} - -/* - * vq_send_wr - post a verbs request message to the Verbs Request Queue. - * If a message is not available in the MQ, then block until one is available. - * NOTE: handle_mq() on the interrupt context will wake up threads blocked here. - * When the adapter drains the Verbs Request Queue, - * it inserts MQ index 0 in to the - * adapter->host activity fifo and interrupts the host. - */ -int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr) -{ - void *msg; - wait_queue_t __wait; - - /* - * grab adapter vq lock - */ - spin_lock(&c2dev->vqlock); - - /* - * allocate msg - */ - msg = c2_mq_alloc(&c2dev->req_vq); - - /* - * If we cannot get a msg, then we'll wait - * When a messages are available, the int handler will wake_up() - * any waiters. - */ - while (msg == NULL) { - pr_debug("%s:%d no available msg in VQ, waiting...\n", - __func__, __LINE__); - init_waitqueue_entry(&__wait, current); - add_wait_queue(&c2dev->req_vq_wo, &__wait); - spin_unlock(&c2dev->vqlock); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (!c2_mq_full(&c2dev->req_vq)) { - break; - } - if (!signal_pending(current)) { - schedule_timeout(1 * HZ); /* 1 second... */ - continue; - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&c2dev->req_vq_wo, &__wait); - return -EINTR; - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&c2dev->req_vq_wo, &__wait); - spin_lock(&c2dev->vqlock); - msg = c2_mq_alloc(&c2dev->req_vq); - } - - /* - * copy wr into adapter msg - */ - memcpy(msg, wr, c2dev->req_vq.msg_size); - - /* - * post msg - */ - c2_mq_produce(&c2dev->req_vq); - - /* - * release adapter vq lock - */ - spin_unlock(&c2dev->vqlock); - return 0; -} - - -/* - * vq_wait_for_reply - block until the adapter posts a Verb Reply Message. - */ -int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req) -{ - if (!wait_event_timeout(req->wait_object, - atomic_read(&req->reply_ready), - 60*HZ)) - return -ETIMEDOUT; - - return 0; -} - -/* - * vq_repbuf_free - Free a Verbs Reply Buffer. - */ -void vq_repbuf_free(struct c2_dev *c2dev, void *reply) -{ - kmem_cache_free(c2dev->host_msg_cache, reply); -} diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_vq.h b/kernel/drivers/infiniband/hw/amso1100/c2_vq.h deleted file mode 100644 index 33805627a..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_vq.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _C2_VQ_H_ -#define _C2_VQ_H_ -#include <linux/sched.h> -#include "c2.h" -#include "c2_wr.h" -#include "c2_provider.h" - -struct c2_vq_req { - u64 reply_msg; /* ptr to reply msg */ - wait_queue_head_t wait_object; /* wait object for vq reqs */ - atomic_t reply_ready; /* set when reply is ready */ - atomic_t refcnt; /* used to cancel WRs... */ - int event; - struct iw_cm_id *cm_id; - struct c2_qp *qp; -}; - -extern int vq_init(struct c2_dev *c2dev); -extern void vq_term(struct c2_dev *c2dev); - -extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev); -extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req); -extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req); -extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req); -extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr); - -extern void *vq_repbuf_alloc(struct c2_dev *c2dev); -extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply); - -extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req); -#endif /* _C2_VQ_H_ */ diff --git a/kernel/drivers/infiniband/hw/amso1100/c2_wr.h b/kernel/drivers/infiniband/hw/amso1100/c2_wr.h deleted file mode 100644 index 8d4b4ca46..000000000 --- a/kernel/drivers/infiniband/hw/amso1100/c2_wr.h +++ /dev/null @@ -1,1520 +0,0 @@ -/* - * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _C2_WR_H_ -#define _C2_WR_H_ - -#ifdef CCDEBUG -#define CCWR_MAGIC 0xb07700b0 -#endif - -#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF - -/* Maximum allowed size in bytes of private_data exchange - * on connect. - */ -#define C2_MAX_PRIVATE_DATA_SIZE 200 - -/* - * These types are shared among the adapter, host, and CCIL consumer. - */ -enum c2_cq_notification_type { - C2_CQ_NOTIFICATION_TYPE_NONE = 1, - C2_CQ_NOTIFICATION_TYPE_NEXT, - C2_CQ_NOTIFICATION_TYPE_NEXT_SE -}; - -enum c2_setconfig_cmd { - C2_CFG_ADD_ADDR = 1, - C2_CFG_DEL_ADDR = 2, - C2_CFG_ADD_ROUTE = 3, - C2_CFG_DEL_ROUTE = 4 -}; - -enum c2_getconfig_cmd { - C2_GETCONFIG_ROUTES = 1, - C2_GETCONFIG_ADDRS -}; - -/* - * CCIL Work Request Identifiers - */ -enum c2wr_ids { - CCWR_RNIC_OPEN = 1, - CCWR_RNIC_QUERY, - CCWR_RNIC_SETCONFIG, - CCWR_RNIC_GETCONFIG, - CCWR_RNIC_CLOSE, - CCWR_CQ_CREATE, - CCWR_CQ_QUERY, - CCWR_CQ_MODIFY, - CCWR_CQ_DESTROY, - CCWR_QP_CONNECT, - CCWR_PD_ALLOC, - CCWR_PD_DEALLOC, - CCWR_SRQ_CREATE, - CCWR_SRQ_QUERY, - CCWR_SRQ_MODIFY, - CCWR_SRQ_DESTROY, - CCWR_QP_CREATE, - CCWR_QP_QUERY, - CCWR_QP_MODIFY, - CCWR_QP_DESTROY, - CCWR_NSMR_STAG_ALLOC, - CCWR_NSMR_REGISTER, - CCWR_NSMR_PBL, - CCWR_STAG_DEALLOC, - CCWR_NSMR_REREGISTER, - CCWR_SMR_REGISTER, - CCWR_MR_QUERY, - CCWR_MW_ALLOC, - CCWR_MW_QUERY, - CCWR_EP_CREATE, - CCWR_EP_GETOPT, - CCWR_EP_SETOPT, - CCWR_EP_DESTROY, - CCWR_EP_BIND, - CCWR_EP_CONNECT, - CCWR_EP_LISTEN, - CCWR_EP_SHUTDOWN, - CCWR_EP_LISTEN_CREATE, - CCWR_EP_LISTEN_DESTROY, - CCWR_EP_QUERY, - CCWR_CR_ACCEPT, - CCWR_CR_REJECT, - CCWR_CONSOLE, - CCWR_TERM, - CCWR_FLASH_INIT, - CCWR_FLASH, - CCWR_BUF_ALLOC, - CCWR_BUF_FREE, - CCWR_FLASH_WRITE, - CCWR_INIT, /* WARNING: Don't move this ever again! */ - - - - /* Add new IDs here */ - - - - /* - * WARNING: CCWR_LAST must always be the last verbs id defined! - * All the preceding IDs are fixed, and must not change. - * You can add new IDs, but must not remove or reorder - * any IDs. If you do, YOU will ruin any hope of - * compatibility between versions. - */ - CCWR_LAST, - - /* - * Start over at 1 so that arrays indexed by user wr id's - * begin at 1. This is OK since the verbs and user wr id's - * are always used on disjoint sets of queues. - */ - /* - * The order of the CCWR_SEND_XX verbs must - * match the order of the RDMA_OPs - */ - CCWR_SEND = 1, - CCWR_SEND_INV, - CCWR_SEND_SE, - CCWR_SEND_SE_INV, - CCWR_RDMA_WRITE, - CCWR_RDMA_READ, - CCWR_RDMA_READ_INV, - CCWR_MW_BIND, - CCWR_NSMR_FASTREG, - CCWR_STAG_INVALIDATE, - CCWR_RECV, - CCWR_NOP, - CCWR_UNIMPL, -/* WARNING: This must always be the last user wr id defined! */ -}; -#define RDMA_SEND_OPCODE_FROM_WR_ID(x) (x+2) - -/* - * SQ/RQ Work Request Types - */ -enum c2_wr_type { - C2_WR_TYPE_SEND = CCWR_SEND, - C2_WR_TYPE_SEND_SE = CCWR_SEND_SE, - C2_WR_TYPE_SEND_INV = CCWR_SEND_INV, - C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV, - C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE, - C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ, - C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV, - C2_WR_TYPE_BIND_MW = CCWR_MW_BIND, - C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG, - C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE, - C2_WR_TYPE_RECV = CCWR_RECV, - C2_WR_TYPE_NOP = CCWR_NOP, -}; - -struct c2_netaddr { - __be32 ip_addr; - __be32 netmask; - u32 mtu; -}; - -struct c2_route { - u32 ip_addr; /* 0 indicates the default route */ - u32 netmask; /* netmask associated with dst */ - u32 flags; - union { - u32 ipaddr; /* address of the nexthop interface */ - u8 enaddr[6]; - } nexthop; -}; - -/* - * A Scatter Gather Entry. - */ -struct c2_data_addr { - __be32 stag; - __be32 length; - __be64 to; -}; - -/* - * MR and MW flags used by the consumer, RI, and RNIC. - */ -enum c2_mm_flags { - MEM_REMOTE = 0x0001, /* allow mw binds with remote access. */ - MEM_VA_BASED = 0x0002, /* Not Zero-based */ - MEM_PBL_COMPLETE = 0x0004, /* PBL array is complete in this msg */ - MEM_LOCAL_READ = 0x0008, /* allow local reads */ - MEM_LOCAL_WRITE = 0x0010, /* allow local writes */ - MEM_REMOTE_READ = 0x0020, /* allow remote reads */ - MEM_REMOTE_WRITE = 0x0040, /* allow remote writes */ - MEM_WINDOW_BIND = 0x0080, /* binds allowed */ - MEM_SHARED = 0x0100, /* set if MR is shared */ - MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */ -}; - -/* - * CCIL API ACF flags defined in terms of the low level mem flags. - * This minimizes translation needed in the user API - */ -enum c2_acf { - C2_ACF_LOCAL_READ = MEM_LOCAL_READ, - C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE, - C2_ACF_REMOTE_READ = MEM_REMOTE_READ, - C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE, - C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND -}; - -/* - * Image types of objects written to flash - */ -#define C2_FLASH_IMG_BITFILE 1 -#define C2_FLASH_IMG_OPTION_ROM 2 -#define C2_FLASH_IMG_VPD 3 - -/* - * to fix bug 1815 we define the max size allowable of the - * terminate message (per the IETF spec).Refer to the IETF - * protocol specification, section 12.1.6, page 64) - * The message is prefixed by 20 types of DDP info. - * - * Then the message has 6 bytes for the terminate control - * and DDP segment length info plus a DDP header (either - * 14 or 18 byts) plus 28 bytes for the RDMA header. - * Thus the max size in: - * 20 + (6 + 18 + 28) = 72 - */ -#define C2_MAX_TERMINATE_MESSAGE_SIZE (72) - -/* - * Build String Length. It must be the same as C2_BUILD_STR_LEN in ccil_api.h - */ -#define WR_BUILD_STR_LEN 64 - -/* - * WARNING: All of these structs need to align any 64bit types on - * 64 bit boundaries! 64bit types include u64 and u64. - */ - -/* - * Clustercore Work Request Header. Be sensitive to field layout - * and alignment. - */ -struct c2wr_hdr { - /* wqe_count is part of the cqe. It is put here so the - * adapter can write to it while the wr is pending without - * clobbering part of the wr. This word need not be dma'd - * from the host to adapter by libccil, but we copy it anyway - * to make the memcpy to the adapter better aligned. - */ - __be32 wqe_count; - - /* Put these fields next so that later 32- and 64-bit - * quantities are naturally aligned. - */ - u8 id; - u8 result; /* adapter -> host */ - u8 sge_count; /* host -> adapter */ - u8 flags; /* host -> adapter */ - - u64 context; -#ifdef CCMSGMAGIC - u32 magic; - u32 pad; -#endif -} __attribute__((packed)); - -/* - *------------------------ RNIC ------------------------ - */ - -/* - * WR_RNIC_OPEN - */ - -/* - * Flags for the RNIC WRs - */ -enum c2_rnic_flags { - RNIC_IRD_STATIC = 0x0001, - RNIC_ORD_STATIC = 0x0002, - RNIC_QP_STATIC = 0x0004, - RNIC_SRQ_SUPPORTED = 0x0008, - RNIC_PBL_BLOCK_MODE = 0x0010, - RNIC_SRQ_MODEL_ARRIVAL = 0x0020, - RNIC_CQ_OVF_DETECTED = 0x0040, - RNIC_PRIV_MODE = 0x0080 -}; - -struct c2wr_rnic_open_req { - struct c2wr_hdr hdr; - u64 user_context; - __be16 flags; /* See enum c2_rnic_flags */ - __be16 port_num; -} __attribute__((packed)); - -struct c2wr_rnic_open_rep { - struct c2wr_hdr hdr; - u32 rnic_handle; -} __attribute__((packed)); - -union c2wr_rnic_open { - struct c2wr_rnic_open_req req; - struct c2wr_rnic_open_rep rep; -} __attribute__((packed)); - -struct c2wr_rnic_query_req { - struct c2wr_hdr hdr; - u32 rnic_handle; -} __attribute__((packed)); - -/* - * WR_RNIC_QUERY - */ -struct c2wr_rnic_query_rep { - struct c2wr_hdr hdr; - u64 user_context; - __be32 vendor_id; - __be32 part_number; - __be32 hw_version; - __be32 fw_ver_major; - __be32 fw_ver_minor; - __be32 fw_ver_patch; - char fw_ver_build_str[WR_BUILD_STR_LEN]; - __be32 max_qps; - __be32 max_qp_depth; - u32 max_srq_depth; - u32 max_send_sgl_depth; - u32 max_rdma_sgl_depth; - __be32 max_cqs; - __be32 max_cq_depth; - u32 max_cq_event_handlers; - __be32 max_mrs; - u32 max_pbl_depth; - __be32 max_pds; - __be32 max_global_ird; - u32 max_global_ord; - __be32 max_qp_ird; - __be32 max_qp_ord; - u32 flags; - __be32 max_mws; - u32 pbe_range_low; - u32 pbe_range_high; - u32 max_srqs; - u32 page_size; -} __attribute__((packed)); - -union c2wr_rnic_query { - struct c2wr_rnic_query_req req; - struct c2wr_rnic_query_rep rep; -} __attribute__((packed)); - -/* - * WR_RNIC_GETCONFIG - */ - -struct c2wr_rnic_getconfig_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 option; /* see c2_getconfig_cmd_t */ - u64 reply_buf; - u32 reply_buf_len; -} __attribute__((packed)) ; - -struct c2wr_rnic_getconfig_rep { - struct c2wr_hdr hdr; - u32 option; /* see c2_getconfig_cmd_t */ - u32 count_len; /* length of the number of addresses configured */ -} __attribute__((packed)) ; - -union c2wr_rnic_getconfig { - struct c2wr_rnic_getconfig_req req; - struct c2wr_rnic_getconfig_rep rep; -} __attribute__((packed)) ; - -/* - * WR_RNIC_SETCONFIG - */ -struct c2wr_rnic_setconfig_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - __be32 option; /* See c2_setconfig_cmd_t */ - /* variable data and pad. See c2_netaddr and c2_route */ - u8 data[0]; -} __attribute__((packed)) ; - -struct c2wr_rnic_setconfig_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_rnic_setconfig { - struct c2wr_rnic_setconfig_req req; - struct c2wr_rnic_setconfig_rep rep; -} __attribute__((packed)) ; - -/* - * WR_RNIC_CLOSE - */ -struct c2wr_rnic_close_req { - struct c2wr_hdr hdr; - u32 rnic_handle; -} __attribute__((packed)) ; - -struct c2wr_rnic_close_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_rnic_close { - struct c2wr_rnic_close_req req; - struct c2wr_rnic_close_rep rep; -} __attribute__((packed)) ; - -/* - *------------------------ CQ ------------------------ - */ -struct c2wr_cq_create_req { - struct c2wr_hdr hdr; - __be64 shared_ht; - u64 user_context; - __be64 msg_pool; - u32 rnic_handle; - __be32 msg_size; - __be32 depth; -} __attribute__((packed)) ; - -struct c2wr_cq_create_rep { - struct c2wr_hdr hdr; - __be32 mq_index; - __be32 adapter_shared; - u32 cq_handle; -} __attribute__((packed)) ; - -union c2wr_cq_create { - struct c2wr_cq_create_req req; - struct c2wr_cq_create_rep rep; -} __attribute__((packed)) ; - -struct c2wr_cq_modify_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 cq_handle; - u32 new_depth; - u64 new_msg_pool; -} __attribute__((packed)) ; - -struct c2wr_cq_modify_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_cq_modify { - struct c2wr_cq_modify_req req; - struct c2wr_cq_modify_rep rep; -} __attribute__((packed)) ; - -struct c2wr_cq_destroy_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 cq_handle; -} __attribute__((packed)) ; - -struct c2wr_cq_destroy_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_cq_destroy { - struct c2wr_cq_destroy_req req; - struct c2wr_cq_destroy_rep rep; -} __attribute__((packed)) ; - -/* - *------------------------ PD ------------------------ - */ -struct c2wr_pd_alloc_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 pd_id; -} __attribute__((packed)) ; - -struct c2wr_pd_alloc_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_pd_alloc { - struct c2wr_pd_alloc_req req; - struct c2wr_pd_alloc_rep rep; -} __attribute__((packed)) ; - -struct c2wr_pd_dealloc_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 pd_id; -} __attribute__((packed)) ; - -struct c2wr_pd_dealloc_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_pd_dealloc { - struct c2wr_pd_dealloc_req req; - struct c2wr_pd_dealloc_rep rep; -} __attribute__((packed)) ; - -/* - *------------------------ SRQ ------------------------ - */ -struct c2wr_srq_create_req { - struct c2wr_hdr hdr; - u64 shared_ht; - u64 user_context; - u32 rnic_handle; - u32 srq_depth; - u32 srq_limit; - u32 sgl_depth; - u32 pd_id; -} __attribute__((packed)) ; - -struct c2wr_srq_create_rep { - struct c2wr_hdr hdr; - u32 srq_depth; - u32 sgl_depth; - u32 msg_size; - u32 mq_index; - u32 mq_start; - u32 srq_handle; -} __attribute__((packed)) ; - -union c2wr_srq_create { - struct c2wr_srq_create_req req; - struct c2wr_srq_create_rep rep; -} __attribute__((packed)) ; - -struct c2wr_srq_destroy_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 srq_handle; -} __attribute__((packed)) ; - -struct c2wr_srq_destroy_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_srq_destroy { - struct c2wr_srq_destroy_req req; - struct c2wr_srq_destroy_rep rep; -} __attribute__((packed)) ; - -/* - *------------------------ QP ------------------------ - */ -enum c2wr_qp_flags { - QP_RDMA_READ = 0x00000001, /* RDMA read enabled? */ - QP_RDMA_WRITE = 0x00000002, /* RDMA write enabled? */ - QP_MW_BIND = 0x00000004, /* MWs enabled */ - QP_ZERO_STAG = 0x00000008, /* enabled? */ - QP_REMOTE_TERMINATION = 0x00000010, /* remote end terminated */ - QP_RDMA_READ_RESPONSE = 0x00000020 /* Remote RDMA read */ - /* enabled? */ -}; - -struct c2wr_qp_create_req { - struct c2wr_hdr hdr; - __be64 shared_sq_ht; - __be64 shared_rq_ht; - u64 user_context; - u32 rnic_handle; - u32 sq_cq_handle; - u32 rq_cq_handle; - __be32 sq_depth; - __be32 rq_depth; - u32 srq_handle; - u32 srq_limit; - __be32 flags; /* see enum c2wr_qp_flags */ - __be32 send_sgl_depth; - __be32 recv_sgl_depth; - __be32 rdma_write_sgl_depth; - __be32 ord; - __be32 ird; - u32 pd_id; -} __attribute__((packed)) ; - -struct c2wr_qp_create_rep { - struct c2wr_hdr hdr; - __be32 sq_depth; - __be32 rq_depth; - u32 send_sgl_depth; - u32 recv_sgl_depth; - u32 rdma_write_sgl_depth; - u32 ord; - u32 ird; - __be32 sq_msg_size; - __be32 sq_mq_index; - __be32 sq_mq_start; - __be32 rq_msg_size; - __be32 rq_mq_index; - __be32 rq_mq_start; - u32 qp_handle; -} __attribute__((packed)) ; - -union c2wr_qp_create { - struct c2wr_qp_create_req req; - struct c2wr_qp_create_rep rep; -} __attribute__((packed)) ; - -struct c2wr_qp_query_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 qp_handle; -} __attribute__((packed)) ; - -struct c2wr_qp_query_rep { - struct c2wr_hdr hdr; - u64 user_context; - u32 rnic_handle; - u32 sq_depth; - u32 rq_depth; - u32 send_sgl_depth; - u32 rdma_write_sgl_depth; - u32 recv_sgl_depth; - u32 ord; - u32 ird; - u16 qp_state; - u16 flags; /* see c2wr_qp_flags_t */ - u32 qp_id; - u32 local_addr; - u32 remote_addr; - u16 local_port; - u16 remote_port; - u32 terminate_msg_length; /* 0 if not present */ - u8 data[0]; - /* Terminate Message in-line here. */ -} __attribute__((packed)) ; - -union c2wr_qp_query { - struct c2wr_qp_query_req req; - struct c2wr_qp_query_rep rep; -} __attribute__((packed)) ; - -struct c2wr_qp_modify_req { - struct c2wr_hdr hdr; - u64 stream_msg; - u32 stream_msg_length; - u32 rnic_handle; - u32 qp_handle; - __be32 next_qp_state; - __be32 ord; - __be32 ird; - __be32 sq_depth; - __be32 rq_depth; - u32 llp_ep_handle; -} __attribute__((packed)) ; - -struct c2wr_qp_modify_rep { - struct c2wr_hdr hdr; - u32 ord; - u32 ird; - u32 sq_depth; - u32 rq_depth; - u32 sq_msg_size; - u32 sq_mq_index; - u32 sq_mq_start; - u32 rq_msg_size; - u32 rq_mq_index; - u32 rq_mq_start; -} __attribute__((packed)) ; - -union c2wr_qp_modify { - struct c2wr_qp_modify_req req; - struct c2wr_qp_modify_rep rep; -} __attribute__((packed)) ; - -struct c2wr_qp_destroy_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 qp_handle; -} __attribute__((packed)) ; - -struct c2wr_qp_destroy_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_qp_destroy { - struct c2wr_qp_destroy_req req; - struct c2wr_qp_destroy_rep rep; -} __attribute__((packed)) ; - -/* - * The CCWR_QP_CONNECT msg is posted on the verbs request queue. It can - * only be posted when a QP is in IDLE state. After the connect request is - * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state. - * No synchronous reply from adapter to this WR. The results of - * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS - * See c2wr_ae_active_connect_results_t - */ -struct c2wr_qp_connect_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 qp_handle; - __be32 remote_addr; - __be16 remote_port; - u16 pad; - __be32 private_data_length; - u8 private_data[0]; /* Private data in-line. */ -} __attribute__((packed)) ; - -struct c2wr_qp_connect { - struct c2wr_qp_connect_req req; - /* no synchronous reply. */ -} __attribute__((packed)) ; - - -/* - *------------------------ MM ------------------------ - */ - -struct c2wr_nsmr_stag_alloc_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 pbl_depth; - u32 pd_id; - u32 flags; -} __attribute__((packed)) ; - -struct c2wr_nsmr_stag_alloc_rep { - struct c2wr_hdr hdr; - u32 pbl_depth; - u32 stag_index; -} __attribute__((packed)) ; - -union c2wr_nsmr_stag_alloc { - struct c2wr_nsmr_stag_alloc_req req; - struct c2wr_nsmr_stag_alloc_rep rep; -} __attribute__((packed)) ; - -struct c2wr_nsmr_register_req { - struct c2wr_hdr hdr; - __be64 va; - u32 rnic_handle; - __be16 flags; - u8 stag_key; - u8 pad; - u32 pd_id; - __be32 pbl_depth; - __be32 pbe_size; - __be32 fbo; - __be32 length; - __be32 addrs_length; - /* array of paddrs (must be aligned on a 64bit boundary) */ - __be64 paddrs[0]; -} __attribute__((packed)) ; - -struct c2wr_nsmr_register_rep { - struct c2wr_hdr hdr; - u32 pbl_depth; - __be32 stag_index; -} __attribute__((packed)) ; - -union c2wr_nsmr_register { - struct c2wr_nsmr_register_req req; - struct c2wr_nsmr_register_rep rep; -} __attribute__((packed)) ; - -struct c2wr_nsmr_pbl_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - __be32 flags; - __be32 stag_index; - __be32 addrs_length; - /* array of paddrs (must be aligned on a 64bit boundary) */ - __be64 paddrs[0]; -} __attribute__((packed)) ; - -struct c2wr_nsmr_pbl_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_nsmr_pbl { - struct c2wr_nsmr_pbl_req req; - struct c2wr_nsmr_pbl_rep rep; -} __attribute__((packed)) ; - -struct c2wr_mr_query_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 stag_index; -} __attribute__((packed)) ; - -struct c2wr_mr_query_rep { - struct c2wr_hdr hdr; - u8 stag_key; - u8 pad[3]; - u32 pd_id; - u32 flags; - u32 pbl_depth; -} __attribute__((packed)) ; - -union c2wr_mr_query { - struct c2wr_mr_query_req req; - struct c2wr_mr_query_rep rep; -} __attribute__((packed)) ; - -struct c2wr_mw_query_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 stag_index; -} __attribute__((packed)) ; - -struct c2wr_mw_query_rep { - struct c2wr_hdr hdr; - u8 stag_key; - u8 pad[3]; - u32 pd_id; - u32 flags; -} __attribute__((packed)) ; - -union c2wr_mw_query { - struct c2wr_mw_query_req req; - struct c2wr_mw_query_rep rep; -} __attribute__((packed)) ; - - -struct c2wr_stag_dealloc_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - __be32 stag_index; -} __attribute__((packed)) ; - -struct c2wr_stag_dealloc_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)) ; - -union c2wr_stag_dealloc { - struct c2wr_stag_dealloc_req req; - struct c2wr_stag_dealloc_rep rep; -} __attribute__((packed)) ; - -struct c2wr_nsmr_reregister_req { - struct c2wr_hdr hdr; - u64 va; - u32 rnic_handle; - u16 flags; - u8 stag_key; - u8 pad; - u32 stag_index; - u32 pd_id; - u32 pbl_depth; - u32 pbe_size; - u32 fbo; - u32 length; - u32 addrs_length; - u32 pad1; - /* array of paddrs (must be aligned on a 64bit boundary) */ - u64 paddrs[0]; -} __attribute__((packed)) ; - -struct c2wr_nsmr_reregister_rep { - struct c2wr_hdr hdr; - u32 pbl_depth; - u32 stag_index; -} __attribute__((packed)) ; - -union c2wr_nsmr_reregister { - struct c2wr_nsmr_reregister_req req; - struct c2wr_nsmr_reregister_rep rep; -} __attribute__((packed)) ; - -struct c2wr_smr_register_req { - struct c2wr_hdr hdr; - u64 va; - u32 rnic_handle; - u16 flags; - u8 stag_key; - u8 pad; - u32 stag_index; - u32 pd_id; -} __attribute__((packed)) ; - -struct c2wr_smr_register_rep { - struct c2wr_hdr hdr; - u32 stag_index; -} __attribute__((packed)) ; - -union c2wr_smr_register { - struct c2wr_smr_register_req req; - struct c2wr_smr_register_rep rep; -} __attribute__((packed)) ; - -struct c2wr_mw_alloc_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 pd_id; -} __attribute__((packed)) ; - -struct c2wr_mw_alloc_rep { - struct c2wr_hdr hdr; - u32 stag_index; -} __attribute__((packed)) ; - -union c2wr_mw_alloc { - struct c2wr_mw_alloc_req req; - struct c2wr_mw_alloc_rep rep; -} __attribute__((packed)) ; - -/* - *------------------------ WRs ----------------------- - */ - -struct c2wr_user_hdr { - struct c2wr_hdr hdr; /* Has status and WR Type */ -} __attribute__((packed)) ; - -enum c2_qp_state { - C2_QP_STATE_IDLE = 0x01, - C2_QP_STATE_CONNECTING = 0x02, - C2_QP_STATE_RTS = 0x04, - C2_QP_STATE_CLOSING = 0x08, - C2_QP_STATE_TERMINATE = 0x10, - C2_QP_STATE_ERROR = 0x20, -}; - -/* Completion queue entry. */ -struct c2wr_ce { - struct c2wr_hdr hdr; /* Has status and WR Type */ - u64 qp_user_context; /* c2_user_qp_t * */ - u32 qp_state; /* Current QP State */ - u32 handle; /* QPID or EP Handle */ - __be32 bytes_rcvd; /* valid for RECV WCs */ - u32 stag; -} __attribute__((packed)) ; - - -/* - * Flags used for all post-sq WRs. These must fit in the flags - * field of the struct c2wr_hdr (eight bits). - */ -enum { - SQ_SIGNALED = 0x01, - SQ_READ_FENCE = 0x02, - SQ_FENCE = 0x04, -}; - -/* - * Common fields for all post-sq WRs. Namely the standard header and a - * secondary header with fields common to all post-sq WRs. - */ -struct c2_sq_hdr { - struct c2wr_user_hdr user_hdr; -} __attribute__((packed)); - -/* - * Same as above but for post-rq WRs. - */ -struct c2_rq_hdr { - struct c2wr_user_hdr user_hdr; -} __attribute__((packed)); - -/* - * use the same struct for all sends. - */ -struct c2wr_send_req { - struct c2_sq_hdr sq_hdr; - __be32 sge_len; - __be32 remote_stag; - u8 data[0]; /* SGE array */ -} __attribute__((packed)); - -union c2wr_send { - struct c2wr_send_req req; - struct c2wr_ce rep; -} __attribute__((packed)); - -struct c2wr_rdma_write_req { - struct c2_sq_hdr sq_hdr; - __be64 remote_to; - __be32 remote_stag; - __be32 sge_len; - u8 data[0]; /* SGE array */ -} __attribute__((packed)); - -union c2wr_rdma_write { - struct c2wr_rdma_write_req req; - struct c2wr_ce rep; -} __attribute__((packed)); - -struct c2wr_rdma_read_req { - struct c2_sq_hdr sq_hdr; - __be64 local_to; - __be64 remote_to; - __be32 local_stag; - __be32 remote_stag; - __be32 length; -} __attribute__((packed)); - -union c2wr_rdma_read { - struct c2wr_rdma_read_req req; - struct c2wr_ce rep; -} __attribute__((packed)); - -struct c2wr_mw_bind_req { - struct c2_sq_hdr sq_hdr; - u64 va; - u8 stag_key; - u8 pad[3]; - u32 mw_stag_index; - u32 mr_stag_index; - u32 length; - u32 flags; -} __attribute__((packed)); - -union c2wr_mw_bind { - struct c2wr_mw_bind_req req; - struct c2wr_ce rep; -} __attribute__((packed)); - -struct c2wr_nsmr_fastreg_req { - struct c2_sq_hdr sq_hdr; - u64 va; - u8 stag_key; - u8 pad[3]; - u32 stag_index; - u32 pbe_size; - u32 fbo; - u32 length; - u32 addrs_length; - /* array of paddrs (must be aligned on a 64bit boundary) */ - u64 paddrs[0]; -} __attribute__((packed)); - -union c2wr_nsmr_fastreg { - struct c2wr_nsmr_fastreg_req req; - struct c2wr_ce rep; -} __attribute__((packed)); - -struct c2wr_stag_invalidate_req { - struct c2_sq_hdr sq_hdr; - u8 stag_key; - u8 pad[3]; - u32 stag_index; -} __attribute__((packed)); - -union c2wr_stag_invalidate { - struct c2wr_stag_invalidate_req req; - struct c2wr_ce rep; -} __attribute__((packed)); - -union c2wr_sqwr { - struct c2_sq_hdr sq_hdr; - struct c2wr_send_req send; - struct c2wr_send_req send_se; - struct c2wr_send_req send_inv; - struct c2wr_send_req send_se_inv; - struct c2wr_rdma_write_req rdma_write; - struct c2wr_rdma_read_req rdma_read; - struct c2wr_mw_bind_req mw_bind; - struct c2wr_nsmr_fastreg_req nsmr_fastreg; - struct c2wr_stag_invalidate_req stag_inv; -} __attribute__((packed)); - - -/* - * RQ WRs - */ -struct c2wr_rqwr { - struct c2_rq_hdr rq_hdr; - u8 data[0]; /* array of SGEs */ -} __attribute__((packed)); - -union c2wr_recv { - struct c2wr_rqwr req; - struct c2wr_ce rep; -} __attribute__((packed)); - -/* - * All AEs start with this header. Most AEs only need to convey the - * information in the header. Some, like LLP connection events, need - * more info. The union typdef c2wr_ae_t has all the possible AEs. - * - * hdr.context is the user_context from the rnic_open WR. NULL If this - * is not affiliated with an rnic - * - * hdr.id is the AE identifier (eg; CCAE_REMOTE_SHUTDOWN, - * CCAE_LLP_CLOSE_COMPLETE) - * - * resource_type is one of: C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ - * - * user_context is the context passed down when the host created the resource. - */ -struct c2wr_ae_hdr { - struct c2wr_hdr hdr; - u64 user_context; /* user context for this res. */ - __be32 resource_type; /* see enum c2_resource_indicator */ - __be32 resource; /* handle for resource */ - __be32 qp_state; /* current QP State */ -} __attribute__((packed)); - -/* - * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ, - * the adapter moves the QP into RTS state - */ -struct c2wr_ae_active_connect_results { - struct c2wr_ae_hdr ae_hdr; - __be32 laddr; - __be32 raddr; - __be16 lport; - __be16 rport; - __be32 private_data_length; - u8 private_data[0]; /* data is in-line in the msg. */ -} __attribute__((packed)); - -/* - * When connections are established by the stack (and the private data - * MPA frame is received), the adapter will generate an event to the host. - * The details of the connection, any private data, and the new connection - * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the - * AE queue: - */ -struct c2wr_ae_connection_request { - struct c2wr_ae_hdr ae_hdr; - u32 cr_handle; /* connreq handle (sock ptr) */ - __be32 laddr; - __be32 raddr; - __be16 lport; - __be16 rport; - __be32 private_data_length; - u8 private_data[0]; /* data is in-line in the msg. */ -} __attribute__((packed)); - -union c2wr_ae { - struct c2wr_ae_hdr ae_generic; - struct c2wr_ae_active_connect_results ae_active_connect_results; - struct c2wr_ae_connection_request ae_connection_request; -} __attribute__((packed)); - -struct c2wr_init_req { - struct c2wr_hdr hdr; - __be64 hint_count; - __be64 q0_host_shared; - __be64 q1_host_shared; - __be64 q1_host_msg_pool; - __be64 q2_host_shared; - __be64 q2_host_msg_pool; -} __attribute__((packed)); - -struct c2wr_init_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)); - -union c2wr_init { - struct c2wr_init_req req; - struct c2wr_init_rep rep; -} __attribute__((packed)); - -/* - * For upgrading flash. - */ - -struct c2wr_flash_init_req { - struct c2wr_hdr hdr; - u32 rnic_handle; -} __attribute__((packed)); - -struct c2wr_flash_init_rep { - struct c2wr_hdr hdr; - u32 adapter_flash_buf_offset; - u32 adapter_flash_len; -} __attribute__((packed)); - -union c2wr_flash_init { - struct c2wr_flash_init_req req; - struct c2wr_flash_init_rep rep; -} __attribute__((packed)); - -struct c2wr_flash_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 len; -} __attribute__((packed)); - -struct c2wr_flash_rep { - struct c2wr_hdr hdr; - u32 status; -} __attribute__((packed)); - -union c2wr_flash { - struct c2wr_flash_req req; - struct c2wr_flash_rep rep; -} __attribute__((packed)); - -struct c2wr_buf_alloc_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 size; -} __attribute__((packed)); - -struct c2wr_buf_alloc_rep { - struct c2wr_hdr hdr; - u32 offset; /* 0 if mem not available */ - u32 size; /* 0 if mem not available */ -} __attribute__((packed)); - -union c2wr_buf_alloc { - struct c2wr_buf_alloc_req req; - struct c2wr_buf_alloc_rep rep; -} __attribute__((packed)); - -struct c2wr_buf_free_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 offset; /* Must match value from alloc */ - u32 size; /* Must match value from alloc */ -} __attribute__((packed)); - -struct c2wr_buf_free_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)); - -union c2wr_buf_free { - struct c2wr_buf_free_req req; - struct c2wr_ce rep; -} __attribute__((packed)); - -struct c2wr_flash_write_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 offset; - u32 size; - u32 type; - u32 flags; -} __attribute__((packed)); - -struct c2wr_flash_write_rep { - struct c2wr_hdr hdr; - u32 status; -} __attribute__((packed)); - -union c2wr_flash_write { - struct c2wr_flash_write_req req; - struct c2wr_flash_write_rep rep; -} __attribute__((packed)); - -/* - * Messages for LLP connection setup. - */ - -/* - * Listen Request. This allocates a listening endpoint to allow passive - * connection setup. Newly established LLP connections are passed up - * via an AE. See c2wr_ae_connection_request_t - */ -struct c2wr_ep_listen_create_req { - struct c2wr_hdr hdr; - u64 user_context; /* returned in AEs. */ - u32 rnic_handle; - __be32 local_addr; /* local addr, or 0 */ - __be16 local_port; /* 0 means "pick one" */ - u16 pad; - __be32 backlog; /* tradional tcp listen bl */ -} __attribute__((packed)); - -struct c2wr_ep_listen_create_rep { - struct c2wr_hdr hdr; - u32 ep_handle; /* handle to new listening ep */ - u16 local_port; /* resulting port... */ - u16 pad; -} __attribute__((packed)); - -union c2wr_ep_listen_create { - struct c2wr_ep_listen_create_req req; - struct c2wr_ep_listen_create_rep rep; -} __attribute__((packed)); - -struct c2wr_ep_listen_destroy_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 ep_handle; -} __attribute__((packed)); - -struct c2wr_ep_listen_destroy_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)); - -union c2wr_ep_listen_destroy { - struct c2wr_ep_listen_destroy_req req; - struct c2wr_ep_listen_destroy_rep rep; -} __attribute__((packed)); - -struct c2wr_ep_query_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 ep_handle; -} __attribute__((packed)); - -struct c2wr_ep_query_rep { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 local_addr; - u32 remote_addr; - u16 local_port; - u16 remote_port; -} __attribute__((packed)); - -union c2wr_ep_query { - struct c2wr_ep_query_req req; - struct c2wr_ep_query_rep rep; -} __attribute__((packed)); - - -/* - * The host passes this down to indicate acceptance of a pending iWARP - * connection. The cr_handle was obtained from the CONNECTION_REQUEST - * AE passed up by the adapter. See c2wr_ae_connection_request_t. - */ -struct c2wr_cr_accept_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 qp_handle; /* QP to bind to this LLP conn */ - u32 ep_handle; /* LLP handle to accept */ - __be32 private_data_length; - u8 private_data[0]; /* data in-line in msg. */ -} __attribute__((packed)); - -/* - * adapter sends reply when private data is successfully submitted to - * the LLP. - */ -struct c2wr_cr_accept_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)); - -union c2wr_cr_accept { - struct c2wr_cr_accept_req req; - struct c2wr_cr_accept_rep rep; -} __attribute__((packed)); - -/* - * The host sends this down if a given iWARP connection request was - * rejected by the consumer. The cr_handle was obtained from a - * previous c2wr_ae_connection_request_t AE sent by the adapter. - */ -struct c2wr_cr_reject_req { - struct c2wr_hdr hdr; - u32 rnic_handle; - u32 ep_handle; /* LLP handle to reject */ -} __attribute__((packed)); - -/* - * Dunno if this is needed, but we'll add it for now. The adapter will - * send the reject_reply after the LLP endpoint has been destroyed. - */ -struct c2wr_cr_reject_rep { - struct c2wr_hdr hdr; -} __attribute__((packed)); - -union c2wr_cr_reject { - struct c2wr_cr_reject_req req; - struct c2wr_cr_reject_rep rep; -} __attribute__((packed)); - -/* - * console command. Used to implement a debug console over the verbs - * request and reply queues. - */ - -/* - * Console request message. It contains: - * - message hdr with id = CCWR_CONSOLE - * - the physaddr/len of host memory to be used for the reply. - * - the command string. eg: "netstat -s" or "zoneinfo" - */ -struct c2wr_console_req { - struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */ - u64 reply_buf; /* pinned host buf for reply */ - u32 reply_buf_len; /* length of reply buffer */ - u8 command[0]; /* NUL terminated ascii string */ - /* containing the command req */ -} __attribute__((packed)); - -/* - * flags used in the console reply. - */ -enum c2_console_flags { - CONS_REPLY_TRUNCATED = 0x00000001 /* reply was truncated */ -} __attribute__((packed)); - -/* - * Console reply message. - * hdr.result contains the c2_status_t error if the reply was _not_ generated, - * or C2_OK if the reply was generated. - */ -struct c2wr_console_rep { - struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */ - u32 flags; -} __attribute__((packed)); - -union c2wr_console { - struct c2wr_console_req req; - struct c2wr_console_rep rep; -} __attribute__((packed)); - - -/* - * Giant union with all WRs. Makes life easier... - */ -union c2wr { - struct c2wr_hdr hdr; - struct c2wr_user_hdr user_hdr; - union c2wr_rnic_open rnic_open; - union c2wr_rnic_query rnic_query; - union c2wr_rnic_getconfig rnic_getconfig; - union c2wr_rnic_setconfig rnic_setconfig; - union c2wr_rnic_close rnic_close; - union c2wr_cq_create cq_create; - union c2wr_cq_modify cq_modify; - union c2wr_cq_destroy cq_destroy; - union c2wr_pd_alloc pd_alloc; - union c2wr_pd_dealloc pd_dealloc; - union c2wr_srq_create srq_create; - union c2wr_srq_destroy srq_destroy; - union c2wr_qp_create qp_create; - union c2wr_qp_query qp_query; - union c2wr_qp_modify qp_modify; - union c2wr_qp_destroy qp_destroy; - struct c2wr_qp_connect qp_connect; - union c2wr_nsmr_stag_alloc nsmr_stag_alloc; - union c2wr_nsmr_register nsmr_register; - union c2wr_nsmr_pbl nsmr_pbl; - union c2wr_mr_query mr_query; - union c2wr_mw_query mw_query; - union c2wr_stag_dealloc stag_dealloc; - union c2wr_sqwr sqwr; - struct c2wr_rqwr rqwr; - struct c2wr_ce ce; - union c2wr_ae ae; - union c2wr_init init; - union c2wr_ep_listen_create ep_listen_create; - union c2wr_ep_listen_destroy ep_listen_destroy; - union c2wr_cr_accept cr_accept; - union c2wr_cr_reject cr_reject; - union c2wr_console console; - union c2wr_flash_init flash_init; - union c2wr_flash flash; - union c2wr_buf_alloc buf_alloc; - union c2wr_buf_free buf_free; - union c2wr_flash_write flash_write; -} __attribute__((packed)); - - -/* - * Accessors for the wr fields that are packed together tightly to - * reduce the wr message size. The wr arguments are void* so that - * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types - * in the struct c2wr union can be passed in. - */ -static __inline__ u8 c2_wr_get_id(void *wr) -{ - return ((struct c2wr_hdr *) wr)->id; -} -static __inline__ void c2_wr_set_id(void *wr, u8 id) -{ - ((struct c2wr_hdr *) wr)->id = id; -} -static __inline__ u8 c2_wr_get_result(void *wr) -{ - return ((struct c2wr_hdr *) wr)->result; -} -static __inline__ void c2_wr_set_result(void *wr, u8 result) -{ - ((struct c2wr_hdr *) wr)->result = result; -} -static __inline__ u8 c2_wr_get_flags(void *wr) -{ - return ((struct c2wr_hdr *) wr)->flags; -} -static __inline__ void c2_wr_set_flags(void *wr, u8 flags) -{ - ((struct c2wr_hdr *) wr)->flags = flags; -} -static __inline__ u8 c2_wr_get_sge_count(void *wr) -{ - return ((struct c2wr_hdr *) wr)->sge_count; -} -static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count) -{ - ((struct c2wr_hdr *) wr)->sge_count = sge_count; -} -static __inline__ __be32 c2_wr_get_wqe_count(void *wr) -{ - return ((struct c2wr_hdr *) wr)->wqe_count; -} -static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count) -{ - ((struct c2wr_hdr *) wr)->wqe_count = wqe_count; -} - -#endif /* _C2_WR_H_ */ diff --git a/kernel/drivers/infiniband/hw/cxgb3/iwch_cm.c b/kernel/drivers/infiniband/hw/cxgb3/iwch_cm.c index cb78b1e9b..f504ba73e 100644 --- a/kernel/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/kernel/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en error = l2t_send(tdev, skb, l2e); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) @@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) error = cxgb3_ofld_send(tdev, skb); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) diff --git a/kernel/drivers/infiniband/hw/cxgb3/iwch_cq.c b/kernel/drivers/infiniband/hw/cxgb3/iwch_cq.c index cf5474ae6..cfe404925 100644 --- a/kernel/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/kernel/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -123,7 +123,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, wc->opcode = IB_WC_LOCAL_INV; break; case T3_FAST_REGISTER: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/kernel/drivers/infiniband/hw/cxgb3/iwch_provider.c b/kernel/drivers/infiniband/hw/cxgb3/iwch_provider.c index 811b24a53..c34725ca0 100644 --- a/kernel/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/kernel/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -85,9 +85,13 @@ static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int iwch_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) { return -ENOSYS; } @@ -138,10 +142,12 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq) return 0; } -static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int vector, - struct ib_ucontext *ib_context, - struct ib_udata *udata) +static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, + struct ib_udata *udata) { + int entries = attr->cqe; struct iwch_dev *rhp; struct iwch_cq *chp; struct iwch_create_cq_resp uresp; @@ -151,6 +157,9 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve size_t resplen; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + if (attr->flags) + return ERR_PTR(-EINVAL); + rhp = to_iwch_dev(ibdev); chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) @@ -454,6 +463,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) return -EINVAL; mhp = to_iwch_mr(ib_mr); + kfree(mhp->pages); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, @@ -727,6 +737,10 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) /* * T3 only supports 32 bits of size. */ + if (sizeof(phys_addr_t) > 4) { + pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n"); + return ERR_PTR(-ENOTSUPP); + } bl.size = 0xffffffff; bl.addr = 0; kva = 0; @@ -787,7 +801,9 @@ static int iwch_dealloc_mw(struct ib_mw *mw) return 0; } -static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -796,17 +812,27 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) u32 stag = 0; int ret = 0; + if (mr_type != IB_MR_TYPE_MEM_REG || + max_num_sg > T3_MAX_FASTREG_DEPTH) + return ERR_PTR(-EINVAL); + php = to_iwch_pd(pd); rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) goto err; + mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mhp->pages) { + ret = -ENOMEM; + goto pl_err; + } + mhp->rhp = rhp; - ret = iwch_alloc_pbl(mhp, pbl_depth); + ret = iwch_alloc_pbl(mhp, max_num_sg); if (ret) goto err1; - mhp->attr.pbl_size = pbl_depth; + mhp->attr.pbl_size = max_num_sg; ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (ret) @@ -828,31 +854,34 @@ err3: err2: iwch_free_pbl(mhp); err1: + kfree(mhp->pages); +pl_err: kfree(mhp); err: return ERR_PTR(ret); } -static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( - struct ib_device *device, - int page_list_len) +static int iwch_set_page(struct ib_mr *ibmr, u64 addr) { - struct ib_fast_reg_page_list *page_list; + struct iwch_mr *mhp = to_iwch_mr(ibmr); - page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), - GFP_KERNEL); - if (!page_list) - return ERR_PTR(-ENOMEM); + if (unlikely(mhp->npages == mhp->attr.pbl_size)) + return -ENOMEM; - page_list->page_list = (u64 *)(page_list + 1); - page_list->max_page_list_len = page_list_len; + mhp->pages[mhp->npages++] = addr; - return page_list; + return 0; } -static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) +static int iwch_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - kfree(page_list); + struct iwch_mr *mhp = to_iwch_mr(ibmr); + + mhp->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page); } static int iwch_destroy_qp(struct ib_qp *ib_qp) @@ -1145,13 +1174,17 @@ static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) (fw_mic & 0xffff); } -static int iwch_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct iwch_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + dev = to_iwch_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); @@ -1343,6 +1376,23 @@ static struct device_attribute *iwch_class_attributes[] = { &dev_attr_board_id, }; +static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = iwch_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1409,9 +1459,8 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.alloc_mw = iwch_alloc_mw; dev->ibdev.bind_mw = iwch_bind_mw; dev->ibdev.dealloc_mw = iwch_dealloc_mw; - dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr; - dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; - dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; + dev->ibdev.alloc_mr = iwch_alloc_mr; + dev->ibdev.map_mr_sg = iwch_map_mr_sg; dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; dev->ibdev.process_mad = iwch_process_mad; @@ -1420,6 +1469,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.post_recv = iwch_post_receive; dev->ibdev.get_protocol_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; + dev->ibdev.get_port_immutable = iwch_port_immutable; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/kernel/drivers/infiniband/hw/cxgb3/iwch_provider.h b/kernel/drivers/infiniband/hw/cxgb3/iwch_provider.h index 87c14b0c5..2ac85b86a 100644 --- a/kernel/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/kernel/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -77,6 +77,8 @@ struct iwch_mr { struct iwch_dev *rhp; u64 kva; struct tpt_attributes attr; + u64 *pages; + u32 npages; }; typedef struct iwch_mw iwch_mw_handle; diff --git a/kernel/drivers/infiniband/hw/cxgb3/iwch_qp.c b/kernel/drivers/infiniband/hw/cxgb3/iwch_qp.c index b57c0befd..d0548fc63 100644 --- a/kernel/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/kernel/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -95,8 +95,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, wqe->write.reserved[0] = 0; wqe->write.reserved[1] = 0; wqe->write.reserved[2] = 0; - wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); - wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { plen = 4; @@ -137,8 +137,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, wqe->read.local_inv = 0; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); - wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr); wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); @@ -146,27 +146,28 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) { + struct iwch_mr *mhp = to_iwch_mr(wr->mr); int i; __be64 *p; - if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) + if (mhp->npages > T3_MAX_FASTREG_DEPTH) return -EINVAL; *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); - wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); - wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fastreg.stag = cpu_to_be32(wr->key); + wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length); + wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32); wqe->fastreg.va_base_lo_fbo = - cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); + cpu_to_be32(mhp->ibmr.iova & 0xffffffff); wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | - V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | + V_FR_PAGE_COUNT(mhp->npages) | + V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) | V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); + V_FR_PERMS(iwch_ib_to_tpt_access(wr->access))); p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { + for (i = 0; i < mhp->npages; i++, p++) { /* If we need a 2nd WR, then set it up */ if (i == T3_MAX_FASTREG_FRAG) { @@ -175,14 +176,14 @@ static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, Q_PTR2IDX((wq->wptr+1), wq->size_log2)); build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, + 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG, T3_EOP); p = &wqe->pbl_frag.pbl_addrs[0]; } - *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + *p = cpu_to_be64((u64)mhp->pages[i]); } - *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; + *flit_cnt = 5 + mhp->npages; if (*flit_cnt > 15) *flit_cnt = 15; return 0; @@ -414,10 +415,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.oldest_read) qhp->wq.oldest_read = sqp; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: t3_wr_opcode = T3_WR_FASTREG; - err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, - &wr_cnt, &qhp->wq); + err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, + &wr_cnt, &qhp->wq); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) diff --git a/kernel/drivers/infiniband/hw/cxgb4/cm.c b/kernel/drivers/infiniband/hw/cxgb4/cm.c index 3ad8dc798..c9cffced0 100644 --- a/kernel/drivers/infiniband/hw/cxgb4/cm.c +++ b/kernel/drivers/infiniband/hw/cxgb4/cm.c @@ -50,6 +50,7 @@ #include <rdma/ib_addr.h> #include "iw_cxgb4.h" +#include "clip_tbl.h" static char *states[] = { "idle", @@ -115,11 +116,11 @@ module_param(ep_timeout_secs, int, 0644); MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " "in seconds (default=60)"); -static int mpa_rev = 1; +static int mpa_rev = 2; module_param(mpa_rev, int, 0644); MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft" - " compliant (default=1)"); + " compliant (default=2)"); static int markers_enabled; module_param(markers_enabled, int, 0644); @@ -298,6 +299,16 @@ void _c4iw_free_ep(struct kref *kref) if (test_bit(QP_REFERENCED, &ep->com.flags)) deref_qp(ep); if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + + cxgb4_clip_release( + ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, + 1); + } remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); @@ -442,6 +453,12 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) kfree_skb(skb); connect_reply_upcall(ep, -EHOSTUNREACH); state_set(&ep->com, DEAD); + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); dst_release(ep->dst); @@ -615,22 +632,18 @@ static void best_mtu(const unsigned short *mtus, unsigned short mtu, static int send_connect(struct c4iw_ep *ep) { - struct cpl_act_open_req *req; - struct cpl_t5_act_open_req *t5_req; - struct cpl_act_open_req6 *req6; - struct cpl_t5_act_open_req6 *t5_req6; + struct cpl_act_open_req *req = NULL; + struct cpl_t5_act_open_req *t5req = NULL; + struct cpl_t6_act_open_req *t6req = NULL; + struct cpl_act_open_req6 *req6 = NULL; + struct cpl_t5_act_open_req6 *t5req6 = NULL; + struct cpl_t6_act_open_req6 *t6req6 = NULL; struct sk_buff *skb; u64 opt0; u32 opt2; unsigned int mtu_idx; int wscale; - int wrlen; - int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? - sizeof(struct cpl_act_open_req) : - sizeof(struct cpl_t5_act_open_req); - int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? - sizeof(struct cpl_act_open_req6) : - sizeof(struct cpl_t5_act_open_req6); + int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.mapped_local_addr; struct sockaddr_in *ra = (struct sockaddr_in *) @@ -639,7 +652,28 @@ static int send_connect(struct c4iw_ep *ep) &ep->com.mapped_local_addr; struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr; - int win; + int ret; + enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; + u32 isn = (prandom_u32() & ~7UL) - 1; + + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + sizev4 = sizeof(struct cpl_act_open_req); + sizev6 = sizeof(struct cpl_act_open_req6); + break; + case CHELSIO_T5: + sizev4 = sizeof(struct cpl_t5_act_open_req); + sizev6 = sizeof(struct cpl_t5_act_open_req6); + break; + case CHELSIO_T6: + sizev4 = sizeof(struct cpl_t6_act_open_req); + sizev6 = sizeof(struct cpl_t6_act_open_req6); + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + return -EINVAL; + } wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? roundup(sizev4, 16) : @@ -688,109 +722,128 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= SACK_EN_F; if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN_F; - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { + if (peer2peer) + isn += 4; + opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); opt2 |= T5_ISS_F; } + + if (ep->com.remote_addr.ss_family == AF_INET6) + cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&la6->sin6_addr.s6_addr, 1); + t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); - if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { - if (ep->com.remote_addr.ss_family == AF_INET) { - req = (struct cpl_act_open_req *) skb_put(skb, wrlen); + if (ep->com.remote_addr.ss_family == AF_INET) { + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + req = (struct cpl_act_open_req *)skb_put(skb, wrlen); INIT_TP_WR(req, 0); - OPCODE_TID(req) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ, - ((ep->rss_qid << 14) | ep->atid))); - req->local_port = la->sin_port; - req->peer_port = ra->sin_port; - req->local_ip = la->sin_addr.s_addr; - req->peer_ip = ra->sin_addr.s_addr; - req->opt0 = cpu_to_be64(opt0); + break; + case CHELSIO_T5: + t5req = (struct cpl_t5_act_open_req *)skb_put(skb, + wrlen); + INIT_TP_WR(t5req, 0); + req = (struct cpl_act_open_req *)t5req; + break; + case CHELSIO_T6: + t6req = (struct cpl_t6_act_open_req *)skb_put(skb, + wrlen); + INIT_TP_WR(t6req, 0); + req = (struct cpl_act_open_req *)t6req; + t5req = (struct cpl_t5_act_open_req *)t6req; + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + ret = -EINVAL; + goto clip_release; + } + + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid<<14) | ep->atid))); + req->local_port = la->sin_port; + req->peer_port = ra->sin_port; + req->local_ip = la->sin_addr.s_addr; + req->peer_ip = ra->sin_addr.s_addr; + req->opt0 = cpu_to_be64(opt0); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { req->params = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); req->opt2 = cpu_to_be32(opt2); } else { + t5req->params = cpu_to_be64(FILTER_TUPLE_V( + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t))); + t5req->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, t5req->rsvd); + t5req->opt2 = cpu_to_be32(opt2); + } + } else { + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen); - INIT_TP_WR(req6, 0); - OPCODE_TID(req6) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, - ((ep->rss_qid<<14)|ep->atid))); - req6->local_port = la6->sin6_port; - req6->peer_port = ra6->sin6_port; - req6->local_ip_hi = *((__be64 *) - (la6->sin6_addr.s6_addr)); - req6->local_ip_lo = *((__be64 *) - (la6->sin6_addr.s6_addr + 8)); - req6->peer_ip_hi = *((__be64 *) - (ra6->sin6_addr.s6_addr)); - req6->peer_ip_lo = *((__be64 *) - (ra6->sin6_addr.s6_addr + 8)); - req6->opt0 = cpu_to_be64(opt0); + break; + case CHELSIO_T5: + t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb, + wrlen); + INIT_TP_WR(t5req6, 0); + req6 = (struct cpl_act_open_req6 *)t5req6; + break; + case CHELSIO_T6: + t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb, + wrlen); + INIT_TP_WR(t6req6, 0); + req6 = (struct cpl_act_open_req6 *)t6req6; + t5req6 = (struct cpl_t5_act_open_req6 *)t6req6; + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + ret = -EINVAL; + goto clip_release; + } + + OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + ((ep->rss_qid<<14)|ep->atid))); + req6->local_port = la6->sin6_port; + req6->peer_port = ra6->sin6_port; + req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr)); + req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8)); + req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr)); + req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8)); + req6->opt0 = cpu_to_be64(opt0); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { req6->params = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); req6->opt2 = cpu_to_be32(opt2); - } - } else { - u32 isn = (prandom_u32() & ~7UL) - 1; - - if (peer2peer) - isn += 4; - - if (ep->com.remote_addr.ss_family == AF_INET) { - t5_req = (struct cpl_t5_act_open_req *) - skb_put(skb, wrlen); - INIT_TP_WR(t5_req, 0); - OPCODE_TID(t5_req) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ, - ((ep->rss_qid << 14) | ep->atid))); - t5_req->local_port = la->sin_port; - t5_req->peer_port = ra->sin_port; - t5_req->local_ip = la->sin_addr.s_addr; - t5_req->peer_ip = ra->sin_addr.s_addr; - t5_req->opt0 = cpu_to_be64(opt0); - t5_req->params = cpu_to_be64(FILTER_TUPLE_V( - cxgb4_select_ntuple( - ep->com.dev->rdev.lldi.ports[0], - ep->l2t))); - t5_req->rsvd = cpu_to_be32(isn); - PDBG("%s snd_isn %u\n", __func__, - be32_to_cpu(t5_req->rsvd)); - t5_req->opt2 = cpu_to_be32(opt2); } else { - t5_req6 = (struct cpl_t5_act_open_req6 *) - skb_put(skb, wrlen); - INIT_TP_WR(t5_req6, 0); - OPCODE_TID(t5_req6) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, - ((ep->rss_qid<<14)|ep->atid))); - t5_req6->local_port = la6->sin6_port; - t5_req6->peer_port = ra6->sin6_port; - t5_req6->local_ip_hi = *((__be64 *) - (la6->sin6_addr.s6_addr)); - t5_req6->local_ip_lo = *((__be64 *) - (la6->sin6_addr.s6_addr + 8)); - t5_req6->peer_ip_hi = *((__be64 *) - (ra6->sin6_addr.s6_addr)); - t5_req6->peer_ip_lo = *((__be64 *) - (ra6->sin6_addr.s6_addr + 8)); - t5_req6->opt0 = cpu_to_be64(opt0); - t5_req6->params = cpu_to_be64(FILTER_TUPLE_V( - cxgb4_select_ntuple( + t5req6->params = cpu_to_be64(FILTER_TUPLE_V( + cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t))); - t5_req6->rsvd = cpu_to_be32(isn); - PDBG("%s snd_isn %u\n", __func__, - be32_to_cpu(t5_req6->rsvd)); - t5_req6->opt2 = cpu_to_be32(opt2); + t5req6->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd); + t5req6->opt2 = cpu_to_be32(opt2); } } set_bit(ACT_OPEN_REQ, &ep->com.history); - return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +clip_release: + if (ret && ep->com.remote_addr.ss_family == AF_INET6) + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&la6->sin6_addr.s6_addr, 1); + return ret; } static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, @@ -1169,6 +1222,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) if ((status == 0) || (status == -ECONNREFUSED)) { if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ + event.ord = ep->ird; + event.ird = ep->ord; event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + @@ -1176,6 +1231,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) sizeof(struct mpa_v2_conn_params); } else { /* this means MPA_v1 is used */ + event.ord = cur_max_read_depth(ep->com.dev); + event.ird = cur_max_read_depth(ep->com.dev); event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); @@ -1238,8 +1295,8 @@ static void established_upcall(struct c4iw_ep *ep) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; - event.ird = ep->ird; - event.ord = ep->ord; + event.ird = ep->ord; + event.ord = ep->ird; if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); @@ -1871,7 +1928,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, struct dst_entry *dst, struct c4iw_dev *cdev, - bool clear_mpa_v1) + bool clear_mpa_v1, enum chip_type adapter_type) { struct neighbour *n; int err, step; @@ -1906,7 +1963,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, + cxgb4_port_viid(pdev)); step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -1925,7 +1983,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; ep->mtu = dst_mtu(dst); ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, + cxgb4_port_viid(pdev)); step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -1998,7 +2057,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) err = -EHOSTUNREACH; goto fail3; } - err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false); + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, + ep->com.dev->rdev.lldi.adapter_type); if (err) { pr_err("%s - cannot alloc l2e.\n", __func__); goto fail4; @@ -2091,6 +2151,15 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case CPL_ERR_CONN_EXIST: if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + cxgb4_clip_release( + ep->com.dev->rdev.lldi.ports[0], + (const u32 *) + &sin6->sin6_addr.s6_addr, 1); + } remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); @@ -2118,6 +2187,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) connect_reply_upcall(ep, status2errno(status)); state_set(&ep->com, DEAD); + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } if (status && act_open_has_tid(status)) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); @@ -2171,13 +2246,14 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, int wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; + enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(skb_cloned(skb)); skb_get(skb); rpl = cplhdr(skb); - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (!is_t4(adapter_type)) { skb_trim(skb, roundup(sizeof(*rpl5), 16)); rpl5 = (void *)rpl; INIT_TP_WR(rpl5, ep->hwtid); @@ -2224,12 +2300,16 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, const struct tcphdr *tcph; u32 hlen = ntohl(req->hdr_len); - tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + - IP_HDR_LEN_G(hlen); + if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5) + tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + + IP_HDR_LEN_G(hlen); + else + tcph = (const void *)(req + 1) + + T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen); if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN_V(1); } - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); @@ -2260,12 +2340,16 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) return; } -static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype, - __u8 *local_ip, __u8 *peer_ip, +static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, + int *iptype, __u8 *local_ip, __u8 *peer_ip, __be16 *local_port, __be16 *peer_port) { - int eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); struct tcphdr *tcp = (struct tcphdr *) @@ -2302,6 +2386,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) struct dst_entry *dst; __u8 local_ip[16], peer_ip[16]; __be16 local_port, peer_port; + struct sockaddr_in6 *sin6; int err; u16 peer_mss = ntohs(req->tcpopt.mss); int iptype; @@ -2319,7 +2404,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port); + get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, + local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { @@ -2354,7 +2440,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - err = import_ep(child_ep, iptype, peer_ip, dst, dev, false); + err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, + parent_ep->com.dev->rdev.lldi.adapter_type); if (err) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -2400,9 +2487,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) sin->sin_port = peer_port; sin->sin_addr.s_addr = *(__be32 *)peer_ip; } else { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &child_ep->com.mapped_local_addr; - + sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr; sin6->sin6_family = PF_INET6; sin6->sin6_port = local_port; memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); @@ -2436,6 +2521,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); accept_cr(child_ep, skb, req); set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); + if (iptype == 6) { + sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr; + cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } goto out; reject: reject_cr(dev, hwtid, skb); @@ -2672,6 +2762,15 @@ out: if (release) release_ep_resources(ep); else if (ep->retry_with_mpa_v1) { + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + cxgb4_clip_release( + ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, + 1); + } remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); @@ -2874,7 +2973,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { if (peer2peer && (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && - (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0) + (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0) ep->ird = 1; } @@ -2976,7 +3075,7 @@ static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr; struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr; - if (get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { + if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { memcpy(la6->sin6_addr.s6_addr, &addr, 16); memcpy(ra6->sin6_addr.s6_addr, &addr, 16); return 0; @@ -3134,7 +3233,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail2; } - err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true); + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, + ep->com.dev->rdev.lldi.adapter_type); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); goto fail3; @@ -3186,6 +3286,9 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", err, ep->stid, sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port)); + else + cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); return err; } @@ -3202,6 +3305,10 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) sin->sin_addr.s_addr, sin->sin_port, 0, ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); if (err == -EBUSY) { + if (c4iw_fatal_error(&ep->com.dev->rdev)) { + err = -EIO; + break; + } set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(usecs_to_jiffies(100)); } @@ -3334,6 +3441,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) ep->com.dev->rdev.lldi.ports[0], ep->stid, ep->com.dev->rdev.lldi.rxq_ids[0], 0); } else { + struct sockaddr_in6 *sin6; c4iw_init_wr_wait(&ep->com.wr_wait); err = cxgb4_remove_server( ep->com.dev->rdev.lldi.ports[0], ep->stid, @@ -3342,6 +3450,9 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) goto done; err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, __func__); + sin6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); } remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, @@ -3461,6 +3572,12 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, mutex_unlock(&dev->rdev.stats.lock); connect_reply_upcall(ep, status2errno(req->retval)); state_set(&ep->com, DEAD); + if (ep->com.remote_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + } remove_handle(dev, &dev->atid_idr, atid); cxgb4_free_atid(dev->rdev.lldi.tids, atid); dst_release(ep->dst); @@ -3525,20 +3642,23 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) { - u32 l2info; - u16 vlantag, len, hdr_len, eth_hdr_len; + __be32 l2info; + __be16 hdr_len, vlantag, len; + u16 eth_hdr_len; + int tcp_hdr_len, ip_hdr_len; u8 intf; struct cpl_rx_pkt *cpl = cplhdr(skb); struct cpl_pass_accept_req *req; struct tcp_options_received tmp_opt; struct c4iw_dev *dev; + enum chip_type type; dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); /* Store values from cpl_rx_pkt in temporary location. */ - vlantag = (__force u16) cpl->vlan; - len = (__force u16) cpl->len; - l2info = (__force u32) cpl->l2info; - hdr_len = (__force u16) cpl->hdr_len; + vlantag = cpl->vlan; + len = cpl->len; + l2info = cpl->l2info; + hdr_len = cpl->hdr_len; intf = cpl->iff; __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); @@ -3555,20 +3675,28 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) memset(req, 0, sizeof(*req)); req->l2info = cpu_to_be16(SYN_INTF_V(intf) | SYN_MAC_IDX_V(RX_MACIDX_G( - (__force int) htonl(l2info))) | + be32_to_cpu(l2info))) | SYN_XACT_MATCH_F); - eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? - RX_ETHHDR_LEN_G((__force int)htonl(l2info)) : - RX_T5_ETHHDR_LEN_G((__force int)htonl(l2info)); - req->hdr_len = cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G( - (__force int) htonl(l2info))) | - TCP_HDR_LEN_V(RX_TCPHDR_LEN_G( - (__force int) htons(hdr_len))) | - IP_HDR_LEN_V(RX_IPHDR_LEN_G( - (__force int) htons(hdr_len))) | - ETH_HDR_LEN_V(RX_ETHHDR_LEN_G(eth_hdr_len))); - req->vlan = (__force __be16) vlantag; - req->len = (__force __be16) len; + type = dev->rdev.lldi.adapter_type; + tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len)); + ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len)); + req->hdr_len = + cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info)))); + if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) { + eth_hdr_len = is_t4(type) ? + RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) : + RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info)); + req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) | + IP_HDR_LEN_V(ip_hdr_len) | + ETH_HDR_LEN_V(eth_hdr_len)); + } else { /* T6 and later */ + eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info)); + req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) | + T6_IP_HDR_LEN_V(ip_hdr_len) | + T6_ETH_HDR_LEN_V(eth_hdr_len)); + } + req->vlan = vlantag; + req->len = len; req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) | PASS_OPEN_TOS_V(tos)); req->tcpopt.mss = htons(tmp_opt.mss_clamp); @@ -3687,9 +3815,22 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? - RX_ETHHDR_LEN_G(htonl(cpl->l2info)) : - RX_T5_ETHHDR_LEN_G(htonl(cpl->l2info)); + switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) { + case CHELSIO_T4: + eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + case CHELSIO_T5: + eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + case CHELSIO_T6: + eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)); + goto reject; + } + if (eth_hdr_len == ETH_HLEN) { eh = (struct ethhdr *)(req + 1); iph = (struct iphdr *)(eh + 1); diff --git a/kernel/drivers/infiniband/hw/cxgb4/cq.c b/kernel/drivers/infiniband/hw/cxgb4/cq.c index 68ddb3710..de9cd6901 100644 --- a/kernel/drivers/infiniband/hw/cxgb4/cq.c +++ b/kernel/drivers/infiniband/hw/cxgb4/cq.c @@ -156,19 +156,17 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, goto err4; cq->gen = 1; + cq->gts = rdev->lldi.gts_reg; cq->rdev = rdev; - if (user) { - u32 off = (cq->cqid << rdev->cqshift) & PAGE_MASK; - cq->ugts = (u64)rdev->bar2_pa + off; - } else if (is_t4(rdev->lldi.adapter_type)) { - cq->gts = rdev->lldi.gts_reg; - cq->qid_mask = -1U; - } else { - u32 off = ((cq->cqid << rdev->cqshift) & PAGE_MASK) + 12; - - cq->gts = rdev->bar2_kva + off; - cq->qid_mask = rdev->qpmask; + cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, + &cq->bar2_qid, + user ? &cq->bar2_pa : NULL); + if (user && !cq->bar2_va) { + pr_warn(MOD "%s: cqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), cq->cqid); + ret = -EINVAL; + goto err4; } return 0; err4: @@ -754,7 +752,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) wc->opcode = IB_WC_LOCAL_INV; break; case FW_RI_FAST_REGISTER: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; break; default: printk(KERN_ERR MOD "Unexpected opcode %d " @@ -816,7 +814,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for QPID=0x%0x\n", CQE_STATUS(&cqe), CQE_QPID(&cqe)); - ret = -EINVAL; + wc->status = IB_WC_FATAL_ERR; } } out: @@ -866,10 +864,13 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) return 0; } -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *ib_context, +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; struct c4iw_create_cq_resp uresp; @@ -879,6 +880,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, struct c4iw_mm_entry *mm, *mm2; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + if (attr->flags) + return ERR_PTR(-EINVAL); rhp = to_c4iw_dev(ibdev); @@ -971,7 +974,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, insert_mmap(ucontext, mm); mm2->key = uresp.gts_key; - mm2->addr = chp->cq.ugts; + mm2->addr = chp->cq.bar2_pa; mm2->len = PAGE_SIZE; insert_mmap(ucontext, mm2); } diff --git a/kernel/drivers/infiniband/hw/cxgb4/device.c b/kernel/drivers/infiniband/hw/cxgb4/device.c index 7e895d714..58fce1742 100644 --- a/kernel/drivers/infiniband/hw/cxgb4/device.c +++ b/kernel/drivers/infiniband/hw/cxgb4/device.c @@ -795,13 +795,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) goto err1; } - /* - * qpshift is the number of bits to shift the qpid left in order - * to get the correct address of the doorbell for that qp. - */ - rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density); rdev->qpmask = rdev->lldi.udb_density - 1; - rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density); rdev->cqmask = rdev->lldi.ucq_density - 1; PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d " "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x " @@ -815,14 +809,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, rdev->lldi.vr->cq.size); - PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " - "qpmask 0x%x cqshift %lu cqmask 0x%x\n", + PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p " + "qpmask 0x%x cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), (void *)pci_resource_start(rdev->lldi.pdev, 2), - rdev->lldi.db_reg, - rdev->lldi.gts_reg, - rdev->qpshift, rdev->qpmask, - rdev->cqshift, rdev->cqmask); + rdev->lldi.db_reg, rdev->lldi.gts_reg, + rdev->qpmask, rdev->cqmask); if (c4iw_num_stags(rdev) == 0) { err = -EINVAL; @@ -970,12 +962,12 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.lldi.sge_egrstatuspagesize; /* - * For T5 devices, we map all of BAR2 with WC. + * For T5/T6 devices, we map all of BAR2 with WC. * For T4 devices with onchip qp mem, we map only that part * of BAR2 with WC. */ devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); - if (is_t5(devp->rdev.lldi.adapter_type)) { + if (!is_t4(devp->rdev.lldi.adapter_type)) { devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, pci_resource_len(devp->rdev.lldi.pdev, 2)); if (!devp->rdev.bar2_kva) { @@ -1275,11 +1267,9 @@ static int enable_qp_db(int id, void *p, void *data) static void resume_rc_qp(struct c4iw_qp *qp) { spin_lock(&qp->lock); - t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, - is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL); qp->wq.sq.wq_pidx_inc = 0; - t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, - is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL); qp->wq.rq.wq_pidx_inc = 0; spin_unlock(&qp->lock); } diff --git a/kernel/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/kernel/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 97bb5550a..00e55faa0 100644 --- a/kernel/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/kernel/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -165,9 +165,7 @@ struct wr_log_entry { struct c4iw_rdev { struct c4iw_resource resource; - unsigned long qpshift; u32 qpmask; - unsigned long cqshift; u32 cqmask; struct c4iw_dev_ucontext uctx; struct gen_pool *pbl_pool; @@ -388,6 +386,10 @@ struct c4iw_mr { struct c4iw_dev *rhp; u64 kva; struct tpt_attributes attr; + u64 *mpl; + dma_addr_t mpl_addr; + u32 max_mpl_len; + u32 mpl_len; }; static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) @@ -407,20 +409,6 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) return container_of(ibmw, struct c4iw_mw, ibmw); } -struct c4iw_fr_page_list { - struct ib_fast_reg_page_list ibpl; - DEFINE_DMA_UNMAP_ADDR(mapping); - dma_addr_t dma_addr; - struct c4iw_dev *dev; - int pll_len; -}; - -static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( - struct ib_fast_reg_page_list *ibpl) -{ - return container_of(ibpl, struct c4iw_fr_page_list, ibpl); -} - struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; @@ -968,11 +956,12 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); -void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list); -struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( - struct ib_device *device, - int page_list_len); -struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth); +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); +int c4iw_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); int c4iw_dealloc_mw(struct ib_mw *mw); struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, @@ -992,10 +981,10 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int acc, u64 *iova_start); int c4iw_dereg_mr(struct ib_mr *ib_mr); int c4iw_destroy_cq(struct ib_cq *ib_cq); -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, - int vector, - struct ib_ucontext *ib_context, - struct ib_udata *udata); +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, + struct ib_udata *udata); int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_destroy_qp(struct ib_qp *ib_qp); @@ -1032,6 +1021,9 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa); extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); extern int c4iw_wr_log; extern int db_fc_threshold; diff --git a/kernel/drivers/infiniband/hw/cxgb4/mem.c b/kernel/drivers/infiniband/hw/cxgb4/mem.c index cff815b91..e1629ab58 100644 --- a/kernel/drivers/infiniband/hw/cxgb4/mem.c +++ b/kernel/drivers/infiniband/hw/cxgb4/mem.c @@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F); - req->wr.wr_lo = (__force __be64)&wr_wait; + req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( @@ -853,7 +853,9 @@ int c4iw_dealloc_mw(struct ib_mw *mw) return 0; } -struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -861,6 +863,11 @@ struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) u32 mmid; u32 stag = 0; int ret = 0; + int length = roundup(max_num_sg * sizeof(u64), 32); + + if (mr_type != IB_MR_TYPE_MEM_REG || + max_num_sg > t4_max_fr_depth(use_dsgl)) + return ERR_PTR(-EINVAL); php = to_c4iw_pd(pd); rhp = php->rhp; @@ -870,11 +877,19 @@ struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) goto err; } + mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev, + length, &mhp->mpl_addr, GFP_KERNEL); + if (!mhp->mpl) { + ret = -ENOMEM; + goto err_mpl; + } + mhp->max_mpl_len = length; + mhp->rhp = rhp; - ret = alloc_pbl(mhp, pbl_depth); + ret = alloc_pbl(mhp, max_num_sg); if (ret) goto err1; - mhp->attr.pbl_size = pbl_depth; + mhp->attr.pbl_size = max_num_sg; ret = allocate_stag(&rhp->rdev, &stag, php->pdid, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (ret) @@ -899,54 +914,35 @@ err2: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); err1: + dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); +err_mpl: kfree(mhp); err: return ERR_PTR(ret); } -struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, - int page_list_len) +static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) { - struct c4iw_fr_page_list *c4pl; - struct c4iw_dev *dev = to_c4iw_dev(device); - dma_addr_t dma_addr; - int pll_len = roundup(page_list_len * sizeof(u64), 32); + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); - c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL); - if (!c4pl) - return ERR_PTR(-ENOMEM); - - c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, - pll_len, &dma_addr, - GFP_KERNEL); - if (!c4pl->ibpl.page_list) { - kfree(c4pl); - return ERR_PTR(-ENOMEM); - } - dma_unmap_addr_set(c4pl, mapping, dma_addr); - c4pl->dma_addr = dma_addr; - c4pl->dev = dev; - c4pl->pll_len = pll_len; + if (unlikely(mhp->mpl_len == mhp->max_mpl_len)) + return -ENOMEM; - PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", - __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, - &c4pl->dma_addr); + mhp->mpl[mhp->mpl_len++] = addr; - return &c4pl->ibpl; + return 0; } -void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) +int c4iw_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); - PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", - __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, - &c4pl->dma_addr); + mhp->mpl_len = 0; - dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, - c4pl->pll_len, - c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); - kfree(c4pl); + return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page); } int c4iw_dereg_mr(struct ib_mr *ib_mr) @@ -964,6 +960,9 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; remove_handle(rhp, &rhp->mmidr, mmid); + if (mhp->mpl) + dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (mhp->attr.pbl_size) diff --git a/kernel/drivers/infiniband/hw/cxgb4/provider.c b/kernel/drivers/infiniband/hw/cxgb4/provider.c index 66bd6a2ad..0a7d99818 100644 --- a/kernel/drivers/infiniband/hw/cxgb4/provider.c +++ b/kernel/drivers/infiniband/hw/cxgb4/provider.c @@ -80,9 +80,13 @@ static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_wc *in_wc, - struct ib_grh *in_grh, struct ib_mad *in_mad, - struct ib_mad *out_mad) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) { return -ENOSYS; } @@ -205,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if (addr >= rdev->oc_mw_pa) vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); else { - if (is_t5(rdev->lldi.adapter_type)) + if (!is_t4(rdev->lldi.adapter_type)) vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); else @@ -301,13 +305,17 @@ static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } -static int c4iw_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct c4iw_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + dev = to_c4iw_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); @@ -445,10 +453,10 @@ static int c4iw_get_mib(struct ib_device *ibdev, cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); memset(stats, 0, sizeof *stats); - stats->iw.tcpInSegs = v4.tcpInSegs + v6.tcpInSegs; - stats->iw.tcpOutSegs = v4.tcpOutSegs + v6.tcpOutSegs; - stats->iw.tcpRetransSegs = v4.tcpRetransSegs + v6.tcpRetransSegs; - stats->iw.tcpOutRsts = v4.tcpOutRsts + v6.tcpOutSegs; + stats->iw.tcpInSegs = v4.tcp_in_segs + v6.tcp_in_segs; + stats->iw.tcpOutSegs = v4.tcp_out_segs + v6.tcp_out_segs; + stats->iw.tcpRetransSegs = v4.tcp_retrans_segs + v6.tcp_retrans_segs; + stats->iw.tcpOutRsts = v4.tcp_out_rsts + v6.tcp_out_rsts; return 0; } @@ -465,6 +473,23 @@ static struct device_attribute *c4iw_class_attributes[] = { &dev_attr_board_id, }; +static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = c4iw_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + int c4iw_register_device(struct c4iw_dev *dev) { int ret; @@ -531,9 +556,8 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.alloc_mw = c4iw_alloc_mw; dev->ibdev.bind_mw = c4iw_bind_mw; dev->ibdev.dealloc_mw = c4iw_dealloc_mw; - dev->ibdev.alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr; - dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl; - dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl; + dev->ibdev.alloc_mr = c4iw_alloc_mr; + dev->ibdev.map_mr_sg = c4iw_map_mr_sg; dev->ibdev.attach_mcast = c4iw_multicast_attach; dev->ibdev.detach_mcast = c4iw_multicast_detach; dev->ibdev.process_mad = c4iw_process_mad; @@ -542,6 +566,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.post_recv = c4iw_post_receive; dev->ibdev.get_protocol_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; + dev->ibdev.get_port_immutable = c4iw_port_immutable; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/kernel/drivers/infiniband/hw/cxgb4/qp.c b/kernel/drivers/infiniband/hw/cxgb4/qp.c index 389ced335..aa515afee 100644 --- a/kernel/drivers/infiniband/hw/cxgb4/qp.c +++ b/kernel/drivers/infiniband/hw/cxgb4/qp.c @@ -165,6 +165,29 @@ static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, return 0; } +/* + * Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL, + * then this is a user mapping so compute the page-aligned physical address + * for mapping. + */ +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa) +{ + u64 bar2_qoffset; + int ret; + + ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype, + pbar2_pa ? 1 : 0, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; + + if (pbar2_pa) + *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; + return rdev->bar2_kva + bar2_qoffset; +} + static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) @@ -236,25 +259,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; - wq->gts = rdev->lldi.gts_reg; - if (user || is_t5(rdev->lldi.adapter_type)) { - u32 off; - off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK; - if (user) { - wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off); - } else { - off += 128 * (wq->sq.qid & rdev->qpmask) + 8; - wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off); - } - off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK; - if (user) { - wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off); - } else { - off += 128 * (wq->rq.qid & rdev->qpmask) + 8; - wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off); - } + wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, + &wq->sq.bar2_qid, + user ? &wq->sq.bar2_pa : NULL); + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) { + pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); + goto free_dma; } + wq->rdev = rdev; wq->rq.msn = 1; @@ -336,10 +357,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (ret) goto free_dma; - PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n", + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n", __func__, wq->sq.qid, wq->rq.qid, wq->db, - (__force unsigned long) wq->sq.udb, - (__force unsigned long) wq->rq.udb); + wq->sq.bar2_va, wq->rq.bar2_va); return 0; free_dma: @@ -508,8 +528,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; wqe->write.r2 = 0; - wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); - wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->write.u.immd_src, wr, @@ -546,10 +566,10 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) if (wr->num_sge > 1) return -EINVAL; if (wr->num_sge) { - wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); - wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr + wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr >> 32)); - wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); + wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr); wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr @@ -585,47 +605,41 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } -static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16, u8 t5dev) +static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_reg_wr *wr, u8 *len16, u8 t5dev) { - + struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); struct fw_ri_immd *imdp; __be64 *p; int i; - int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); int rem; - if (wr->wr.fast_reg.page_list_len > - t4_max_fr_depth(use_dsgl)) + if (mhp->mpl_len > t4_max_fr_depth(use_dsgl)) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; - wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; + wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12; wqe->fr.addr_type = FW_RI_VA_BASED_TO; - wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access); wqe->fr.len_hi = 0; - wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); - wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); - wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); - wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & + wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length); + wqe->fr.stag = cpu_to_be32(wr->key); + wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { - struct c4iw_fr_page_list *c4pl = - to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); struct fw_ri_dsgl *sglp; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - wr->wr.fast_reg.page_list->page_list[i] = (__force u64) - cpu_to_be64((u64) - wr->wr.fast_reg.page_list->page_list[i]); - } + for (i = 0; i < mhp->mpl_len; i++) + mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]); sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); sglp->op = FW_RI_DATA_DSGL; sglp->r1 = 0; sglp->nsge = cpu_to_be16(1); - sglp->addr0 = cpu_to_be64(c4pl->dma_addr); + sglp->addr0 = cpu_to_be64(mhp->mpl_addr); sglp->len0 = cpu_to_be32(pbllen); *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); @@ -637,9 +651,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, imdp->immdlen = cpu_to_be32(pbllen); p = (__be64 *)(imdp + 1); rem = pbllen; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - *p = cpu_to_be64( - (u64)wr->wr.fast_reg.page_list->page_list[i]); + for (i = 0; i < mhp->mpl_len; i++) { + *p = cpu_to_be64((u64)mhp->mpl[i]); rem -= sizeof(*p); if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; @@ -692,8 +705,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) spin_lock_irqsave(&qhp->rhp->lock, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) - t4_ring_sq_db(&qhp->wq, inc, - is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_sq_db(&qhp->wq, inc, NULL); else { add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); qhp->wq.sq.wq_pidx_inc += inc; @@ -710,8 +722,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) spin_lock_irqsave(&qhp->rhp->lock, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) - t4_ring_rq_db(&qhp->wq, inc, - is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_rq_db(&qhp->wq, inc, NULL); else { add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); qhp->wq.rq.wq_pidx_inc += inc; @@ -793,13 +804,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, - is_t5( - qhp->rhp->rdev.lldi.adapter_type) ? - 1 : 0); + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, + is_t5( + qhp->rhp->rdev.lldi.adapter_type) ? + 1 : 0); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -840,8 +851,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } if (!qhp->rhp->rdev.status_page->db_off) { - t4_ring_sq_db(&qhp->wq, idx, - is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + t4_ring_sq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { spin_unlock_irqrestore(&qhp->lock, flag); @@ -914,8 +924,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs--; } if (!qhp->rhp->rdev.status_page->db_off) { - t4_ring_rq_db(&qhp->wq, idx, - is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + t4_ring_rq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { spin_unlock_irqrestore(&qhp->lock, flag); @@ -1766,11 +1775,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; - mm3->addr = (__force unsigned long)qhp->wq.sq.udb; + mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa; mm3->len = PAGE_SIZE; insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; - mm4->addr = (__force unsigned long)qhp->wq.rq.udb; + mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); if (mm5) { @@ -1855,7 +1864,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; - if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) return -EINVAL; diff --git a/kernel/drivers/infiniband/hw/cxgb4/t4.h b/kernel/drivers/infiniband/hw/cxgb4/t4.h index 7f2a6c244..1092a2d1f 100644 --- a/kernel/drivers/infiniband/hw/cxgb4/t4.h +++ b/kernel/drivers/infiniband/hw/cxgb4/t4.h @@ -33,6 +33,7 @@ #include "t4_hw.h" #include "t4_regs.h" +#include "t4_values.h" #include "t4_msg.h" #include "t4fw_ri_api.h" @@ -290,8 +291,10 @@ struct t4_sq { unsigned long phys_addr; struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; - u64 __iomem *udb; + void __iomem *bar2_va; + u64 bar2_pa; size_t memsize; + u32 bar2_qid; u32 qid; u16 in_use; u16 size; @@ -314,8 +317,10 @@ struct t4_rq { dma_addr_t dma_addr; DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_swrqe *sw_rq; - u64 __iomem *udb; + void __iomem *bar2_va; + u64 bar2_pa; size_t memsize; + u32 bar2_qid; u32 qid; u32 msn; u32 rqt_hwaddr; @@ -332,7 +337,6 @@ struct t4_wq { struct t4_sq sq; struct t4_rq rq; void __iomem *db; - void __iomem *gts; struct c4iw_rdev *rdev; int flushed; }; @@ -451,21 +455,23 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src) } } -static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, - union t4_wr *wqe) +static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) { /* Flush host queue memory writes. */ wmb(); - if (t5) { - if (inc == 1 && wqe) { + if (wq->sq.bar2_va) { + if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) { PDBG("%s: WC wq->sq.pidx = %d\n", __func__, wq->sq.pidx); - pio_copy(wq->sq.udb + 7, (void *)wqe); + pio_copy((u64 __iomem *) + (wq->sq.bar2_va + SGE_UDB_WCDOORBELL), + (u64 *)wqe); } else { PDBG("%s: DB wq->sq.pidx = %d\n", __func__, wq->sq.pidx); - writel(PIDX_T5_V(inc), wq->sq.udb); + writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid), + wq->sq.bar2_va + SGE_UDB_KDOORBELL); } /* Flush user doorbell area writes. */ @@ -475,21 +481,24 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db); } -static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, +static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, union t4_recv_wr *wqe) { /* Flush host queue memory writes. */ wmb(); - if (t5) { - if (inc == 1 && wqe) { + if (wq->rq.bar2_va) { + if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) { PDBG("%s: WC wq->rq.pidx = %d\n", __func__, wq->rq.pidx); - pio_copy(wq->rq.udb + 7, (void *)wqe); + pio_copy((u64 __iomem *) + (wq->rq.bar2_va + SGE_UDB_WCDOORBELL), + (void *)wqe); } else { PDBG("%s: DB wq->rq.pidx = %d\n", __func__, wq->rq.pidx); - writel(PIDX_T5_V(inc), wq->rq.udb); + writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid), + wq->rq.bar2_va + SGE_UDB_KDOORBELL); } /* Flush user doorbell area writes. */ @@ -534,8 +543,10 @@ struct t4_cq { DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_cqe *sw_queue; void __iomem *gts; + void __iomem *bar2_va; + u64 bar2_pa; + u32 bar2_qid; struct c4iw_rdev *rdev; - u64 ugts; size_t memsize; __be64 bits_type_ts; u32 cqid; @@ -552,6 +563,15 @@ struct t4_cq { unsigned long flags; }; +static inline void write_gts(struct t4_cq *cq, u32 val) +{ + if (cq->bar2_va) + writel(val | INGRESSQID_V(cq->bar2_qid), + cq->bar2_va + SGE_UDB_GTS); + else + writel(val | INGRESSQID_V(cq->cqid), cq->gts); +} + static inline int t4_clear_cq_armed(struct t4_cq *cq) { return test_and_clear_bit(CQ_ARMED, &cq->flags); @@ -563,14 +583,12 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se) set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_M) { - val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid & cq->qid_mask); - writel(val, cq->gts); + val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7); + write_gts(cq, val); cq->cidx_inc -= CIDXINC_M; } - val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6) | - INGRESSQID_V(cq->cqid & cq->qid_mask); - writel(val, cq->gts); + val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6); + write_gts(cq, val); cq->cidx_inc = 0; return 0; } @@ -601,9 +619,8 @@ static inline void t4_hwcq_consume(struct t4_cq *cq) if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_M) { u32 val; - val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid & cq->qid_mask); - writel(val, cq->gts); + val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7); + write_gts(cq, val); cq->cidx_inc = 0; } if (++cq->cidx == cq->size) { diff --git a/kernel/drivers/infiniband/hw/ehca/Kconfig b/kernel/drivers/infiniband/hw/ehca/Kconfig deleted file mode 100644 index 59f807d8d..000000000 --- a/kernel/drivers/infiniband/hw/ehca/Kconfig +++ /dev/null @@ -1,9 +0,0 @@ -config INFINIBAND_EHCA - tristate "eHCA support" - depends on IBMEBUS - ---help--- - This driver supports the IBM pSeries eHCA InfiniBand adapter. - - To compile the driver as a module, choose M here. The module - will be called ib_ehca. - diff --git a/kernel/drivers/infiniband/hw/ehca/Makefile b/kernel/drivers/infiniband/hw/ehca/Makefile deleted file mode 100644 index 74d284e46..000000000 --- a/kernel/drivers/infiniband/hw/ehca/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# Authors: Heiko J Schick <schickhj@de.ibm.com> -# Christoph Raisch <raisch@de.ibm.com> -# Joachim Fenkes <fenkes@de.ibm.com> -# -# Copyright (c) 2005 IBM Corporation -# -# All rights reserved. -# -# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. - -obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o - -ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ - ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ - ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o - diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_av.c b/kernel/drivers/infiniband/hw/ehca/ehca_av.c deleted file mode 100644 index 465926319..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_av.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * address vector functions - * - * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Khadija Souissi <souissik@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/slab.h> - -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -static struct kmem_cache *av_cache; - -int ehca_calc_ipd(struct ehca_shca *shca, int port, - enum ib_rate path_rate, u32 *ipd) -{ - int path = ib_rate_to_mult(path_rate); - int link, ret; - struct ib_port_attr pa; - - if (path_rate == IB_RATE_PORT_CURRENT) { - *ipd = 0; - return 0; - } - - if (unlikely(path < 0)) { - ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", - path_rate); - return -EINVAL; - } - - ret = ehca_query_port(&shca->ib_device, port, &pa); - if (unlikely(ret < 0)) { - ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); - return ret; - } - - link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; - - if (path >= link) - /* no need to throttle if path faster than link */ - *ipd = 0; - else - /* IPD = round((link / path) - 1) */ - *ipd = ((link + (path >> 1)) / path) - 1; - - return 0; -} - -struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) -{ - int ret; - struct ehca_av *av; - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - - av = kmem_cache_alloc(av_cache, GFP_KERNEL); - if (!av) { - ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", - pd, ah_attr); - return ERR_PTR(-ENOMEM); - } - - av->av.sl = ah_attr->sl; - av->av.dlid = ah_attr->dlid; - av->av.slid_path_bits = ah_attr->src_path_bits; - - if (ehca_static_rate < 0) { - u32 ipd; - if (ehca_calc_ipd(shca, ah_attr->port_num, - ah_attr->static_rate, &ipd)) { - ret = -EINVAL; - goto create_ah_exit1; - } - av->av.ipd = ipd; - } else - av->av.ipd = ehca_static_rate; - - av->av.lnh = ah_attr->ah_flags; - av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, - ah_attr->grh.traffic_class); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, - ah_attr->grh.flow_label); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, - ah_attr->grh.hop_limit); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); - /* set sgid in grh.word_1 */ - if (ah_attr->ah_flags & IB_AH_GRH) { - int rc; - struct ib_port_attr port_attr; - union ib_gid gid; - memset(&port_attr, 0, sizeof(port_attr)); - rc = ehca_query_port(pd->device, ah_attr->port_num, - &port_attr); - if (rc) { /* invalid port number */ - ret = -EINVAL; - ehca_err(pd->device, "Invalid port number " - "ehca_query_port() returned %x " - "pd=%p ah_attr=%p", rc, pd, ah_attr); - goto create_ah_exit1; - } - memset(&gid, 0, sizeof(gid)); - rc = ehca_query_gid(pd->device, - ah_attr->port_num, - ah_attr->grh.sgid_index, &gid); - if (rc) { - ret = -EINVAL; - ehca_err(pd->device, "Failed to retrieve sgid " - "ehca_query_gid() returned %x " - "pd=%p ah_attr=%p", rc, pd, ah_attr); - goto create_ah_exit1; - } - memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); - } - av->av.pmtu = shca->max_mtu; - - /* dgid comes in grh.word_3 */ - memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, - sizeof(ah_attr->grh.dgid)); - - return &av->ib_ah; - -create_ah_exit1: - kmem_cache_free(av_cache, av); - - return ERR_PTR(ret); -} - -int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) -{ - struct ehca_av *av; - struct ehca_ud_av new_ehca_av; - struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, - ib_device); - - memset(&new_ehca_av, 0, sizeof(new_ehca_av)); - new_ehca_av.sl = ah_attr->sl; - new_ehca_av.dlid = ah_attr->dlid; - new_ehca_av.slid_path_bits = ah_attr->src_path_bits; - new_ehca_av.ipd = ah_attr->static_rate; - new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, - (ah_attr->ah_flags & IB_AH_GRH) > 0); - new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, - ah_attr->grh.traffic_class); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, - ah_attr->grh.flow_label); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, - ah_attr->grh.hop_limit); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); - - /* set sgid in grh.word_1 */ - if (ah_attr->ah_flags & IB_AH_GRH) { - int rc; - struct ib_port_attr port_attr; - union ib_gid gid; - memset(&port_attr, 0, sizeof(port_attr)); - rc = ehca_query_port(ah->device, ah_attr->port_num, - &port_attr); - if (rc) { /* invalid port number */ - ehca_err(ah->device, "Invalid port number " - "ehca_query_port() returned %x " - "ah=%p ah_attr=%p port_num=%x", - rc, ah, ah_attr, ah_attr->port_num); - return -EINVAL; - } - memset(&gid, 0, sizeof(gid)); - rc = ehca_query_gid(ah->device, - ah_attr->port_num, - ah_attr->grh.sgid_index, &gid); - if (rc) { - ehca_err(ah->device, "Failed to retrieve sgid " - "ehca_query_gid() returned %x " - "ah=%p ah_attr=%p port_num=%x " - "sgid_index=%x", - rc, ah, ah_attr, ah_attr->port_num, - ah_attr->grh.sgid_index); - return -EINVAL; - } - memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); - } - - new_ehca_av.pmtu = shca->max_mtu; - - memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, - sizeof(ah_attr->grh.dgid)); - - av = container_of(ah, struct ehca_av, ib_ah); - av->av = new_ehca_av; - - return 0; -} - -int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) -{ - struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); - - memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, - sizeof(ah_attr->grh.dgid)); - ah_attr->sl = av->av.sl; - - ah_attr->dlid = av->av.dlid; - - ah_attr->src_path_bits = av->av.slid_path_bits; - ah_attr->static_rate = av->av.ipd; - ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); - ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, - av->av.grh.word_0); - ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, - av->av.grh.word_0); - ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, - av->av.grh.word_0); - - return 0; -} - -int ehca_destroy_ah(struct ib_ah *ah) -{ - kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); - - return 0; -} - -int ehca_init_av_cache(void) -{ - av_cache = kmem_cache_create("ehca_cache_av", - sizeof(struct ehca_av), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!av_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_av_cache(void) -{ - if (av_cache) - kmem_cache_destroy(av_cache); -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_classes.h b/kernel/drivers/infiniband/hw/ehca/ehca_classes.h deleted file mode 100644 index bd45e0f39..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_classes.h +++ /dev/null @@ -1,482 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Struct definition for eHCA internal structures - * - * Authors: Heiko J Schick <schickhj@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * Joachim Fenkes <fenkes@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_CLASSES_H__ -#define __EHCA_CLASSES_H__ - -struct ehca_module; -struct ehca_qp; -struct ehca_cq; -struct ehca_eq; -struct ehca_mr; -struct ehca_mw; -struct ehca_pd; -struct ehca_av; - -#include <linux/wait.h> -#include <linux/mutex.h> - -#include <rdma/ib_verbs.h> -#include <rdma/ib_user_verbs.h> - -#ifdef CONFIG_PPC64 -#include "ehca_classes_pSeries.h" -#endif -#include "ipz_pt_fn.h" -#include "ehca_qes.h" -#include "ehca_irq.h" - -#define EHCA_EQE_CACHE_SIZE 20 -#define EHCA_MAX_NUM_QUEUES 0xffff - -struct ehca_eqe_cache_entry { - struct ehca_eqe *eqe; - struct ehca_cq *cq; -}; - -struct ehca_eq { - u32 length; - struct ipz_queue ipz_queue; - struct ipz_eq_handle ipz_eq_handle; - struct work_struct work; - struct h_galpas galpas; - int is_initialized; - struct ehca_pfeq pf; - spinlock_t spinlock; - struct tasklet_struct interrupt_task; - u32 ist; - spinlock_t irq_spinlock; - struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; -}; - -struct ehca_sma_attr { - u16 lid, lmc, sm_sl, sm_lid; - u16 pkey_tbl_len, pkeys[16]; -}; - -struct ehca_sport { - struct ib_cq *ibcq_aqp1; - struct ib_qp *ibqp_sqp[2]; - /* lock to serialze modify_qp() calls for sqp in normal - * and irq path (when event PORT_ACTIVE is received first time) - */ - spinlock_t mod_sqp_lock; - enum ib_port_state port_state; - struct ehca_sma_attr saved_attr; - u32 pma_qp_nr; -}; - -#define HCA_CAP_MR_PGSIZE_4K 0x80000000 -#define HCA_CAP_MR_PGSIZE_64K 0x40000000 -#define HCA_CAP_MR_PGSIZE_1M 0x20000000 -#define HCA_CAP_MR_PGSIZE_16M 0x10000000 - -struct ehca_shca { - struct ib_device ib_device; - struct platform_device *ofdev; - u8 num_ports; - int hw_level; - struct list_head shca_list; - struct ipz_adapter_handle ipz_hca_handle; - struct ehca_sport sport[2]; - struct ehca_eq eq; - struct ehca_eq neq; - struct ehca_mr *maxmr; - struct ehca_pd *pd; - struct h_galpas galpas; - struct mutex modify_mutex; - u64 hca_cap; - /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ - u32 hca_cap_mr_pgsize; - int max_mtu; - int max_num_qps; - int max_num_cqs; - atomic_t num_cqs; - atomic_t num_qps; -}; - -struct ehca_pd { - struct ib_pd ib_pd; - struct ipz_pd fw_pd; - /* small queue mgmt */ - struct mutex lock; - struct list_head free[2]; - struct list_head full[2]; -}; - -enum ehca_ext_qp_type { - EQPT_NORMAL = 0, - EQPT_LLQP = 1, - EQPT_SRQBASE = 2, - EQPT_SRQ = 3, -}; - -/* struct to cache modify_qp()'s parms for GSI/SMI qp */ -struct ehca_mod_qp_parm { - int mask; - struct ib_qp_attr attr; -}; - -#define EHCA_MOD_QP_PARM_MAX 4 - -#define QMAP_IDX_MASK 0xFFFFULL - -/* struct for tracking if cqes have been reported to the application */ -struct ehca_qmap_entry { - u16 app_wr_id; - u8 reported; - u8 cqe_req; -}; - -struct ehca_queue_map { - struct ehca_qmap_entry *map; - unsigned int entries; - unsigned int tail; - unsigned int left_to_poll; - unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ -}; - -/* function to calculate the next index for the qmap */ -static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) -{ - unsigned int temp = cur_index + 1; - return (temp == limit) ? 0 : temp; -} - -struct ehca_qp { - union { - struct ib_qp ib_qp; - struct ib_srq ib_srq; - }; - u32 qp_type; - enum ehca_ext_qp_type ext_type; - enum ib_qp_state state; - struct ipz_queue ipz_squeue; - struct ehca_queue_map sq_map; - struct ipz_queue ipz_rqueue; - struct ehca_queue_map rq_map; - struct h_galpas galpas; - u32 qkey; - u32 real_qp_num; - u32 token; - spinlock_t spinlock_s; - spinlock_t spinlock_r; - u32 sq_max_inline_data_size; - struct ipz_qp_handle ipz_qp_handle; - struct ehca_pfqp pf; - struct ib_qp_init_attr init_attr; - struct ehca_cq *send_cq; - struct ehca_cq *recv_cq; - unsigned int sqerr_purgeflag; - struct hlist_node list_entries; - /* array to cache modify_qp()'s parms for GSI/SMI qp */ - struct ehca_mod_qp_parm *mod_qp_parm; - int mod_qp_parm_idx; - /* mmap counter for resources mapped into user space */ - u32 mm_count_squeue; - u32 mm_count_rqueue; - u32 mm_count_galpa; - /* unsolicited ack circumvention */ - int unsol_ack_circ; - int mtu_shift; - u32 message_count; - u32 packet_count; - atomic_t nr_events; /* events seen */ - wait_queue_head_t wait_completion; - int mig_armed; - struct list_head sq_err_node; - struct list_head rq_err_node; -}; - -#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) -#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) -#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) - -/* must be power of 2 */ -#define QP_HASHTAB_LEN 8 - -struct ehca_cq { - struct ib_cq ib_cq; - struct ipz_queue ipz_queue; - struct h_galpas galpas; - spinlock_t spinlock; - u32 cq_number; - u32 token; - u32 nr_of_entries; - struct ipz_cq_handle ipz_cq_handle; - struct ehca_pfcq pf; - spinlock_t cb_lock; - struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; - struct list_head entry; - u32 nr_callbacks; /* #events assigned to cpu by scaling code */ - atomic_t nr_events; /* #events seen */ - wait_queue_head_t wait_completion; - spinlock_t task_lock; - /* mmap counter for resources mapped into user space */ - u32 mm_count_queue; - u32 mm_count_galpa; - struct list_head sqp_err_list; - struct list_head rqp_err_list; -}; - -enum ehca_mr_flag { - EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ - EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ -}; - -struct ehca_mr { - union { - struct ib_mr ib_mr; /* must always be first in ehca_mr */ - struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ - } ib; - struct ib_umem *umem; - spinlock_t mrlock; - - enum ehca_mr_flag flags; - u32 num_kpages; /* number of kernel pages */ - u32 num_hwpages; /* number of hw pages to form MR */ - u64 hwpage_size; /* hw page size used for this MR */ - int acl; /* ACL (stored here for usage in reregister) */ - u64 *start; /* virtual start address (stored here for */ - /* usage in reregister) */ - u64 size; /* size (stored here for usage in reregister) */ - u32 fmr_page_size; /* page size for FMR */ - u32 fmr_max_pages; /* max pages for FMR */ - u32 fmr_max_maps; /* max outstanding maps for FMR */ - u32 fmr_map_cnt; /* map counter for FMR */ - /* fw specific data */ - struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ - struct h_galpas galpas; -}; - -struct ehca_mw { - struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ - spinlock_t mwlock; - - u8 never_bound; /* indication MW was never bound */ - struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ - struct h_galpas galpas; -}; - -enum ehca_mr_pgi_type { - EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, - * ehca_rereg_phys_mr, - * ehca_reg_internal_maxmr */ - EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ - EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ -}; - -struct ehca_mr_pginfo { - enum ehca_mr_pgi_type type; - u64 num_kpages; - u64 kpage_cnt; - u64 hwpage_size; /* hw page size used for this MR */ - u64 num_hwpages; /* number of hw pages */ - u64 hwpage_cnt; /* counter for hw pages */ - u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ - - union { - struct { /* type EHCA_MR_PGI_PHYS section */ - int num_phys_buf; - struct ib_phys_buf *phys_buf_array; - u64 next_buf; - } phy; - struct { /* type EHCA_MR_PGI_USER section */ - struct ib_umem *region; - struct scatterlist *next_sg; - u64 next_nmap; - } usr; - struct { /* type EHCA_MR_PGI_FMR section */ - u64 fmr_pgsize; - u64 *page_list; - u64 next_listelem; - } fmr; - } u; -}; - -/* output parameters for MR/FMR hipz calls */ -struct ehca_mr_hipzout_parms { - struct ipz_mrmw_handle handle; - u32 lkey; - u32 rkey; - u64 len; - u64 vaddr; - u32 acl; -}; - -/* output parameters for MW hipz calls */ -struct ehca_mw_hipzout_parms { - struct ipz_mrmw_handle handle; - u32 rkey; -}; - -struct ehca_av { - struct ib_ah ib_ah; - struct ehca_ud_av av; -}; - -struct ehca_ucontext { - struct ib_ucontext ib_ucontext; -}; - -int ehca_init_pd_cache(void); -void ehca_cleanup_pd_cache(void); -int ehca_init_cq_cache(void); -void ehca_cleanup_cq_cache(void); -int ehca_init_qp_cache(void); -void ehca_cleanup_qp_cache(void); -int ehca_init_av_cache(void); -void ehca_cleanup_av_cache(void); -int ehca_init_mrmw_cache(void); -void ehca_cleanup_mrmw_cache(void); -int ehca_init_small_qp_cache(void); -void ehca_cleanup_small_qp_cache(void); - -extern rwlock_t ehca_qp_idr_lock; -extern rwlock_t ehca_cq_idr_lock; -extern struct idr ehca_qp_idr; -extern struct idr ehca_cq_idr; -extern spinlock_t shca_list_lock; - -extern int ehca_static_rate; -extern int ehca_port_act_time; -extern bool ehca_use_hp_mr; -extern bool ehca_scaling_code; -extern int ehca_lock_hcalls; -extern int ehca_nr_ports; -extern int ehca_max_cq; -extern int ehca_max_qp; - -struct ipzu_queue_resp { - u32 qe_size; /* queue entry size */ - u32 act_nr_of_sg; - u32 queue_length; /* queue length allocated in bytes */ - u32 pagesize; - u32 toggle_state; - u32 offset; /* save offset within a page for small_qp */ -}; - -struct ehca_create_cq_resp { - u32 cq_number; - u32 token; - struct ipzu_queue_resp ipz_queue; - u32 fw_handle_ofs; - u32 dummy; -}; - -struct ehca_create_qp_resp { - u32 qp_num; - u32 token; - u32 qp_type; - u32 ext_type; - u32 qkey; - /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ - u32 real_qp_num; - u32 fw_handle_ofs; - u32 dummy; - struct ipzu_queue_resp ipz_squeue; - struct ipzu_queue_resp ipz_rqueue; -}; - -struct ehca_alloc_cq_parms { - u32 nr_cqe; - u32 act_nr_of_entries; - u32 act_pages; - struct ipz_eq_handle eq_handle; -}; - -enum ehca_service_type { - ST_RC = 0, - ST_UC = 1, - ST_RD = 2, - ST_UD = 3, -}; - -enum ehca_ll_comp_flags { - LLQP_SEND_COMP = 0x20, - LLQP_RECV_COMP = 0x40, - LLQP_COMP_MASK = 0x60, -}; - -struct ehca_alloc_queue_parms { - /* input parameters */ - int max_wr; - int max_sge; - int page_size; - int is_small; - - /* output parameters */ - u16 act_nr_wqes; - u8 act_nr_sges; - u32 queue_size; /* bytes for small queues, pages otherwise */ -}; - -struct ehca_alloc_qp_parms { - struct ehca_alloc_queue_parms squeue; - struct ehca_alloc_queue_parms rqueue; - - /* input parameters */ - enum ehca_service_type servicetype; - int qp_storage; - int sigtype; - enum ehca_ext_qp_type ext_type; - enum ehca_ll_comp_flags ll_comp_flags; - int ud_av_l_key_ctl; - - u32 token; - struct ipz_eq_handle eq_handle; - struct ipz_pd pd; - struct ipz_cq_handle send_cq_handle, recv_cq_handle; - - u32 srq_qpn, srq_token, srq_limit; - - /* output parameters */ - u32 real_qp_num; - struct ipz_qp_handle qp_handle; - struct h_galpas galpas; -}; - -int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); -int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); -struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); - -#endif diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/kernel/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h deleted file mode 100644 index 689c35786..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * pSeries interface definitions - * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_CLASSES_PSERIES_H__ -#define __EHCA_CLASSES_PSERIES_H__ - -#include "hcp_phyp.h" -#include "ipz_pt_fn.h" - - -struct ehca_pfqp { - struct ipz_qpt sqpt; - struct ipz_qpt rqpt; -}; - -struct ehca_pfcq { - struct ipz_qpt qpt; - u32 cqnr; -}; - -struct ehca_pfeq { - struct ipz_qpt qpt; - struct h_galpa galpa; - u32 eqnr; -}; - -struct ipz_adapter_handle { - u64 handle; -}; - -struct ipz_cq_handle { - u64 handle; -}; - -struct ipz_eq_handle { - u64 handle; -}; - -struct ipz_qp_handle { - u64 handle; -}; -struct ipz_mrmw_handle { - u64 handle; -}; - -struct ipz_pd { - u32 value; -}; - -struct hcp_modify_qp_control_block { - u32 qkey; /* 00 */ - u32 rdd; /* reliable datagram domain */ - u32 send_psn; /* 02 */ - u32 receive_psn; /* 03 */ - u32 prim_phys_port; /* 04 */ - u32 alt_phys_port; /* 05 */ - u32 prim_p_key_idx; /* 06 */ - u32 alt_p_key_idx; /* 07 */ - u32 rdma_atomic_ctrl; /* 08 */ - u32 qp_state; /* 09 */ - u32 reserved_10; /* 10 */ - u32 rdma_nr_atomic_resp_res; /* 11 */ - u32 path_migration_state; /* 12 */ - u32 rdma_atomic_outst_dest_qp; /* 13 */ - u32 dest_qp_nr; /* 14 */ - u32 min_rnr_nak_timer_field; /* 15 */ - u32 service_level; /* 16 */ - u32 send_grh_flag; /* 17 */ - u32 retry_count; /* 18 */ - u32 timeout; /* 19 */ - u32 path_mtu; /* 20 */ - u32 max_static_rate; /* 21 */ - u32 dlid; /* 22 */ - u32 rnr_retry_count; /* 23 */ - u32 source_path_bits; /* 24 */ - u32 traffic_class; /* 25 */ - u32 hop_limit; /* 26 */ - u32 source_gid_idx; /* 27 */ - u32 flow_label; /* 28 */ - u32 reserved_29; /* 29 */ - union { /* 30 */ - u64 dw[2]; - u8 byte[16]; - } dest_gid; - u32 service_level_al; /* 34 */ - u32 send_grh_flag_al; /* 35 */ - u32 retry_count_al; /* 36 */ - u32 timeout_al; /* 37 */ - u32 max_static_rate_al; /* 38 */ - u32 dlid_al; /* 39 */ - u32 rnr_retry_count_al; /* 40 */ - u32 source_path_bits_al; /* 41 */ - u32 traffic_class_al; /* 42 */ - u32 hop_limit_al; /* 43 */ - u32 source_gid_idx_al; /* 44 */ - u32 flow_label_al; /* 45 */ - u32 reserved_46; /* 46 */ - u32 reserved_47; /* 47 */ - union { /* 48 */ - u64 dw[2]; - u8 byte[16]; - } dest_gid_al; - u32 max_nr_outst_send_wr; /* 52 */ - u32 max_nr_outst_recv_wr; /* 53 */ - u32 disable_ete_credit_check; /* 54 */ - u32 qp_number; /* 55 */ - u64 send_queue_handle; /* 56 */ - u64 recv_queue_handle; /* 58 */ - u32 actual_nr_sges_in_sq_wqe; /* 60 */ - u32 actual_nr_sges_in_rq_wqe; /* 61 */ - u32 qp_enable; /* 62 */ - u32 curr_srq_limit; /* 63 */ - u64 qp_aff_asyn_ev_log_reg; /* 64 */ - u64 shared_rq_hndl; /* 66 */ - u64 trigg_doorbell_qp_hndl; /* 68 */ - u32 reserved_70_127[58]; /* 70 */ -}; - -#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) -#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) -#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) -#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) -#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) -#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) -#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) -#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) -#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) -#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) -#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) -#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) -#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) -#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) -#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) -#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) -#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) -#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) -#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) -#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) -#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) -#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) -#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) - -#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_cq.c b/kernel/drivers/infiniband/hw/ehca/ehca_cq.c deleted file mode 100644 index 8cc837537..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_cq.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Completion queue handling - * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Khadija Souissi <souissi@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/slab.h> - -#include "ehca_iverbs.h" -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "hcp_if.h" - -static struct kmem_cache *cq_cache; - -int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) -{ - unsigned int qp_num = qp->real_qp_num; - unsigned int key = qp_num & (QP_HASHTAB_LEN-1); - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); - spin_unlock_irqrestore(&cq->spinlock, flags); - - ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", - cq->cq_number, qp_num); - - return 0; -} - -int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) -{ - int ret = -EINVAL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct hlist_node *iter; - struct ehca_qp *qp; - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - hlist_for_each(iter, &cq->qp_hashtab[key]) { - qp = hlist_entry(iter, struct ehca_qp, list_entries); - if (qp->real_qp_num == real_qp_num) { - hlist_del(iter); - ehca_dbg(cq->ib_cq.device, - "removed qp from cq .cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - ret = 0; - break; - } - } - spin_unlock_irqrestore(&cq->spinlock, flags); - if (ret) - ehca_err(cq->ib_cq.device, - "qp not found cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - - return ret; -} - -struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) -{ - struct ehca_qp *ret = NULL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct hlist_node *iter; - struct ehca_qp *qp; - hlist_for_each(iter, &cq->qp_hashtab[key]) { - qp = hlist_entry(iter, struct ehca_qp, list_entries); - if (qp->real_qp_num == real_qp_num) { - ret = qp; - break; - } - } - return ret; -} - -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - static const u32 additional_cqe = 20; - struct ib_cq *cq; - struct ehca_cq *my_cq; - struct ehca_shca *shca = - container_of(device, struct ehca_shca, ib_device); - struct ipz_adapter_handle adapter_handle; - struct ehca_alloc_cq_parms param; /* h_call's out parameters */ - struct h_galpa gal; - void *vpage; - u32 counter; - u64 rpage, cqx_fec, h_ret; - int ipz_rc, i; - unsigned long flags; - - if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) - return ERR_PTR(-EINVAL); - - if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { - ehca_err(device, "Unable to create CQ, max number of %i " - "CQs reached.", shca->max_num_cqs); - ehca_err(device, "To increase the maximum number of CQs " - "use the number_of_cqs module parameter.\n"); - return ERR_PTR(-ENOSPC); - } - - my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); - if (!my_cq) { - ehca_err(device, "Out of memory for ehca_cq struct device=%p", - device); - atomic_dec(&shca->num_cqs); - return ERR_PTR(-ENOMEM); - } - - memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); - - spin_lock_init(&my_cq->spinlock); - spin_lock_init(&my_cq->cb_lock); - spin_lock_init(&my_cq->task_lock); - atomic_set(&my_cq->nr_events, 0); - init_waitqueue_head(&my_cq->wait_completion); - - cq = &my_cq->ib_cq; - - adapter_handle = shca->ipz_hca_handle; - param.eq_handle = shca->eq.ipz_eq_handle; - - idr_preload(GFP_KERNEL); - write_lock_irqsave(&ehca_cq_idr_lock, flags); - my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - idr_preload_end(); - - if (my_cq->token < 0) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't allocate new idr entry. device=%p", - device); - goto create_cq_exit1; - } - - /* - * CQs maximum depth is 4GB-64, but we need additional 20 as buffer - * for receiving errors CQEs. - */ - param.nr_cqe = cqe + additional_cqe; - h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); - - if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_alloc_resource_cq() failed " - "h_ret=%lli device=%p", h_ret, device); - cq = ERR_PTR(ehca2ib_return_code(h_ret)); - goto create_cq_exit2; - } - - ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, - EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); - if (!ipz_rc) { - ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", - ipz_rc, device); - cq = ERR_PTR(-EINVAL); - goto create_cq_exit3; - } - - for (counter = 0; counter < param.act_pages; counter++) { - vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); - if (!vpage) { - ehca_err(device, "ipz_qpageit_get_inc() " - "returns NULL device=%p", device); - cq = ERR_PTR(-EAGAIN); - goto create_cq_exit4; - } - rpage = __pa(vpage); - - h_ret = hipz_h_register_rpage_cq(adapter_handle, - my_cq->ipz_cq_handle, - &my_cq->pf, - 0, - 0, - rpage, - 1, - my_cq->galpas. - kernel); - - if (h_ret < H_SUCCESS) { - ehca_err(device, "hipz_h_register_rpage_cq() failed " - "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " - "act_pages=%i", my_cq, my_cq->cq_number, - h_ret, counter, param.act_pages); - cq = ERR_PTR(-EINVAL); - goto create_cq_exit4; - } - - if (counter == (param.act_pages - 1)) { - vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); - if ((h_ret != H_SUCCESS) || vpage) { - ehca_err(device, "Registration of pages not " - "complete ehca_cq=%p cq_num=%x " - "h_ret=%lli", my_cq, my_cq->cq_number, - h_ret); - cq = ERR_PTR(-EAGAIN); - goto create_cq_exit4; - } - } else { - if (h_ret != H_PAGE_REGISTERED) { - ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%lli " - "counter=%i act_pages=%i", - my_cq, my_cq->cq_number, - h_ret, counter, param.act_pages); - cq = ERR_PTR(-ENOMEM); - goto create_cq_exit4; - } - } - } - - ipz_qeit_reset(&my_cq->ipz_queue); - - gal = my_cq->galpas.kernel; - cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); - ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", - my_cq, my_cq->cq_number, cqx_fec); - - my_cq->ib_cq.cqe = my_cq->nr_of_entries = - param.act_nr_of_entries - additional_cqe; - my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; - - for (i = 0; i < QP_HASHTAB_LEN; i++) - INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); - - INIT_LIST_HEAD(&my_cq->sqp_err_list); - INIT_LIST_HEAD(&my_cq->rqp_err_list); - - if (context) { - struct ipz_queue *ipz_queue = &my_cq->ipz_queue; - struct ehca_create_cq_resp resp; - memset(&resp, 0, sizeof(resp)); - resp.cq_number = my_cq->cq_number; - resp.token = my_cq->token; - resp.ipz_queue.qe_size = ipz_queue->qe_size; - resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; - resp.ipz_queue.queue_length = ipz_queue->queue_length; - resp.ipz_queue.pagesize = ipz_queue->pagesize; - resp.ipz_queue.toggle_state = ipz_queue->toggle_state; - resp.fw_handle_ofs = (u32) - (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - ehca_err(device, "Copy to udata failed."); - cq = ERR_PTR(-EFAULT); - goto create_cq_exit4; - } - } - - return cq; - -create_cq_exit4: - ipz_queue_dtor(NULL, &my_cq->ipz_queue); - -create_cq_exit3: - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); - if (h_ret != H_SUCCESS) - ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " - "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); - -create_cq_exit2: - write_lock_irqsave(&ehca_cq_idr_lock, flags); - idr_remove(&ehca_cq_idr, my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - -create_cq_exit1: - kmem_cache_free(cq_cache, my_cq); - - atomic_dec(&shca->num_cqs); - return cq; -} - -int ehca_destroy_cq(struct ib_cq *cq) -{ - u64 h_ret; - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int cq_num = my_cq->cq_number; - struct ib_device *device = cq->device; - struct ehca_shca *shca = container_of(device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - unsigned long flags; - - if (cq->uobject) { - if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { - ehca_err(device, "Resources still referenced in " - "user space cq_num=%x", my_cq->cq_number); - return -EINVAL; - } - } - - /* - * remove the CQ from the idr first to make sure - * no more interrupt tasklets will touch this CQ - */ - write_lock_irqsave(&ehca_cq_idr_lock, flags); - idr_remove(&ehca_cq_idr, my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - - /* now wait until all pending events have completed */ - wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); - - /* nobody's using our CQ any longer -- we can destroy it */ - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); - if (h_ret == H_R_STATE) { - /* cq in err: read err data and destroy it forcibly */ - ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " - "state. Try to delete it forcibly.", - my_cq, cq_num, my_cq->ipz_cq_handle.handle); - ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); - if (h_ret == H_SUCCESS) - ehca_dbg(device, "cq_num=%x deleted successfully.", - cq_num); - } - if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " - "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); - return ehca2ib_return_code(h_ret); - } - ipz_queue_dtor(NULL, &my_cq->ipz_queue); - kmem_cache_free(cq_cache, my_cq); - - atomic_dec(&shca->num_cqs); - return 0; -} - -int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ - /* TODO: proper resize needs to be done */ - ehca_err(cq->device, "not implemented yet"); - - return -EFAULT; -} - -int ehca_init_cq_cache(void) -{ - cq_cache = kmem_cache_create("ehca_cache_cq", - sizeof(struct ehca_cq), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!cq_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_cq_cache(void) -{ - if (cq_cache) - kmem_cache_destroy(cq_cache); -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_eq.c b/kernel/drivers/infiniband/hw/ehca/ehca_eq.c deleted file mode 100644 index 90da6747d..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_eq.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Event queue handling - * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Khadija Souissi <souissi@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "ehca_iverbs.h" -#include "ehca_qes.h" -#include "hcp_if.h" -#include "ipz_pt_fn.h" - -int ehca_create_eq(struct ehca_shca *shca, - struct ehca_eq *eq, - const enum ehca_eq_type type, const u32 length) -{ - int ret; - u64 h_ret; - u32 nr_pages; - u32 i; - void *vpage; - struct ib_device *ib_dev = &shca->ib_device; - - spin_lock_init(&eq->spinlock); - spin_lock_init(&eq->irq_spinlock); - eq->is_initialized = 0; - - if (type != EHCA_EQ && type != EHCA_NEQ) { - ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); - return -EINVAL; - } - if (!length) { - ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); - return -EINVAL; - } - - h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, - &eq->pf, - type, - length, - &eq->ipz_eq_handle, - &eq->length, - &nr_pages, &eq->ist); - - if (h_ret != H_SUCCESS) { - ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); - return -EINVAL; - } - - ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, - EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); - if (!ret) { - ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); - goto create_eq_exit1; - } - - for (i = 0; i < nr_pages; i++) { - u64 rpage; - - vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (!vpage) - goto create_eq_exit2; - - rpage = __pa(vpage); - h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, - eq->ipz_eq_handle, - &eq->pf, - 0, 0, rpage, 1); - - if (i == (nr_pages - 1)) { - /* last page */ - vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (h_ret != H_SUCCESS || vpage) - goto create_eq_exit2; - } else { - if (h_ret != H_PAGE_REGISTERED) - goto create_eq_exit2; - } - } - - ipz_qeit_reset(&eq->ipz_queue); - - /* register interrupt handlers and initialize work queues */ - if (type == EHCA_EQ) { - tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); - - ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, - 0, "ehca_eq", - (void *)shca); - if (ret < 0) - ehca_err(ib_dev, "Can't map interrupt handler."); - } else if (type == EHCA_NEQ) { - tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); - - ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, - 0, "ehca_neq", - (void *)shca); - if (ret < 0) - ehca_err(ib_dev, "Can't map interrupt handler."); - } - - eq->is_initialized = 1; - - return 0; - -create_eq_exit2: - ipz_queue_dtor(NULL, &eq->ipz_queue); - -create_eq_exit1: - hipz_h_destroy_eq(shca->ipz_hca_handle, eq); - - return -EINVAL; -} - -void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) -{ - unsigned long flags; - void *eqe; - - spin_lock_irqsave(&eq->spinlock, flags); - eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); - spin_unlock_irqrestore(&eq->spinlock, flags); - - return eqe; -} - -int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) -{ - unsigned long flags; - u64 h_ret; - - ibmebus_free_irq(eq->ist, (void *)shca); - - spin_lock_irqsave(&shca_list_lock, flags); - eq->is_initialized = 0; - spin_unlock_irqrestore(&shca_list_lock, flags); - - tasklet_kill(&eq->interrupt_task); - - h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); - - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't free EQ resources."); - return -EINVAL; - } - ipz_queue_dtor(NULL, &eq->ipz_queue); - - return 0; -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_hca.c b/kernel/drivers/infiniband/hw/ehca/ehca_hca.c deleted file mode 100644 index 9ed4d2588..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_hca.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HCA query functions - * - * Authors: Heiko J Schick <schickhj@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/gfp.h> - -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -static unsigned int limit_uint(unsigned int value) -{ - return min_t(unsigned int, value, INT_MAX); -} - -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) -{ - int i, ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_hca *rblock; - - static const u32 cap_mapping[] = { - IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, - IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, - IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, - IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, - IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, - IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, - IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, - IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, - IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, - IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, - IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, - }; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query device properties"); - ret = -EINVAL; - goto query_device1; - } - - memset(props, 0, sizeof(struct ib_device_attr)); - props->page_size_cap = shca->hca_cap_mr_pgsize; - props->fw_ver = rblock->hw_ver; - props->max_mr_size = rblock->max_mr_size; - props->vendor_id = rblock->vendor_id >> 8; - props->vendor_part_id = rblock->vendor_part_id >> 16; - props->hw_ver = rblock->hw_ver; - props->max_qp = limit_uint(rblock->max_qp); - props->max_qp_wr = limit_uint(rblock->max_wqes_wq); - props->max_sge = limit_uint(rblock->max_sge); - props->max_sge_rd = limit_uint(rblock->max_sge_rd); - props->max_cq = limit_uint(rblock->max_cq); - props->max_cqe = limit_uint(rblock->max_cqe); - props->max_mr = limit_uint(rblock->max_mr); - props->max_mw = limit_uint(rblock->max_mw); - props->max_pd = limit_uint(rblock->max_pd); - props->max_ah = limit_uint(rblock->max_ah); - props->max_ee = limit_uint(rblock->max_rd_ee_context); - props->max_rdd = limit_uint(rblock->max_rd_domain); - props->max_fmr = limit_uint(rblock->max_mr); - props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); - props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); - props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); - props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); - props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); - - if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - props->max_srq = limit_uint(props->max_qp); - props->max_srq_wr = limit_uint(props->max_qp_wr); - props->max_srq_sge = 3; - } - - props->max_pkeys = 16; - /* Some FW versions say 0 here; insert sensible value in that case */ - props->local_ca_ack_delay = rblock->local_ca_ack_delay ? - min_t(u8, rblock->local_ca_ack_delay, 255) : 12; - props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); - props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); - props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); - props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); - props->max_total_mcast_qp_attach - = limit_uint(rblock->max_total_mcast_qp_attach); - - /* translate device capabilities */ - props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; - for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) - if (rblock->hca_cap_indicators & cap_mapping[i + 1]) - props->device_cap_flags |= cap_mapping[i]; - -query_device1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) -{ - switch (fw_mtu) { - case 0x1: - return IB_MTU_256; - case 0x2: - return IB_MTU_512; - case 0x3: - return IB_MTU_1024; - case 0x4: - return IB_MTU_2048; - case 0x5: - return IB_MTU_4096; - default: - ehca_err(&shca->ib_device, "Unknown MTU size: %x.", - fw_mtu); - return 0; - } -} - -static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) -{ - switch (vl_cap) { - case 0x1: - return 1; - case 0x2: - return 2; - case 0x3: - return 4; - case 0x4: - return 8; - case 0x5: - return 15; - default: - ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", - vl_cap); - return 0; - } -} - -int ehca_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_port *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_port1; - } - - memset(props, 0, sizeof(struct ib_port_attr)); - - props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); - props->port_cap_flags = rblock->capability_mask; - props->gid_tbl_len = rblock->gid_tbl_len; - if (rblock->max_msg_sz) - props->max_msg_sz = rblock->max_msg_sz; - else - props->max_msg_sz = 0x1 << 31; - props->bad_pkey_cntr = rblock->bad_pkey_cntr; - props->qkey_viol_cntr = rblock->qkey_viol_cntr; - props->pkey_tbl_len = rblock->pkey_tbl_len; - props->lid = rblock->lid; - props->sm_lid = rblock->sm_lid; - props->lmc = rblock->lmc; - props->sm_sl = rblock->sm_sl; - props->subnet_timeout = rblock->subnet_timeout; - props->init_type_reply = rblock->init_type_reply; - props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); - - if (rblock->state && rblock->phys_width) { - props->phys_state = rblock->phys_pstate; - props->state = rblock->phys_state; - props->active_width = rblock->phys_width; - props->active_speed = rblock->phys_speed; - } else { - /* old firmware releases don't report physical - * port info, so use default values - */ - props->phys_state = 5; - props->state = rblock->state; - props->active_width = IB_WIDTH_12X; - props->active_speed = IB_SPEED_SDR; - } - -query_port1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_sma_attr(struct ehca_shca *shca, - u8 port, struct ehca_sma_attr *attr) -{ - int ret = 0; - u64 h_ret; - struct hipz_query_port *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_sma_attr1; - } - - memset(attr, 0, sizeof(struct ehca_sma_attr)); - - attr->lid = rblock->lid; - attr->lmc = rblock->lmc; - attr->sm_sl = rblock->sm_sl; - attr->sm_lid = rblock->sm_lid; - - attr->pkey_tbl_len = rblock->pkey_tbl_len; - memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); - -query_sma_attr1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca; - struct hipz_query_port *rblock; - - shca = container_of(ibdev, struct ehca_shca, ib_device); - if (index > 16) { - ehca_err(&shca->ib_device, "Invalid index: %x.", index); - return -EINVAL; - } - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_pkey1; - } - - memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); - -query_pkey1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_port *rblock; - - if (index < 0 || index > 255) { - ehca_err(&shca->ib_device, "Invalid index: %x.", index); - return -EINVAL; - } - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_gid1; - } - - memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); - memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); - -query_gid1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -static const u32 allowed_port_caps = ( - IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP); - -int ehca_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - int ret = 0; - struct ehca_shca *shca; - struct hipz_query_port *rblock; - u32 cap; - u64 hret; - - shca = container_of(ibdev, struct ehca_shca, ib_device); - if ((props->set_port_cap_mask | props->clr_port_cap_mask) - & ~allowed_port_caps) { - ehca_err(&shca->ib_device, "Non-changeable bits set in masks " - "set=%x clr=%x allowed=%x", props->set_port_cap_mask, - props->clr_port_cap_mask, allowed_port_caps); - return -EINVAL; - } - - if (mutex_lock_interruptible(&shca->modify_mutex)) - return -ERESTARTSYS; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - ret = -ENOMEM; - goto modify_port1; - } - - hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto modify_port2; - } - - cap = (rblock->capability_mask | props->set_port_cap_mask) - & ~props->clr_port_cap_mask; - - hret = hipz_h_modify_port(shca->ipz_hca_handle, port, - cap, props->init_type, port_modify_mask); - if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", - hret); - ret = -EINVAL; - } - -modify_port2: - ehca_free_fw_ctrlblock(rblock); - -modify_port1: - mutex_unlock(&shca->modify_mutex); - - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_irq.c b/kernel/drivers/infiniband/hw/ehca/ehca_irq.c deleted file mode 100644 index 8615d7cf7..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_irq.c +++ /dev/null @@ -1,870 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Functions for EQs, NEQs and interrupts - * - * Authors: Heiko J Schick <schickhj@de.ibm.com> - * Khadija Souissi <souissi@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Joachim Fenkes <fenkes@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/slab.h> -#include <linux/smpboot.h> - -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "ehca_iverbs.h" -#include "ehca_tools.h" -#include "hcp_if.h" -#include "hipz_fns.h" -#include "ipz_pt_fn.h" - -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) - -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) -#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) -#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) - -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) - -static void queue_comp_task(struct ehca_cq *__cq); - -static struct ehca_comp_pool *pool; - -static inline void comp_event_callback(struct ehca_cq *cq) -{ - if (!cq->ib_cq.comp_handler) - return; - - spin_lock(&cq->cb_lock); - cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); - spin_unlock(&cq->cb_lock); - - return; -} - -static void print_error_data(struct ehca_shca *shca, void *data, - u64 *rblock, int length) -{ - u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); - u64 resource = rblock[1]; - - switch (type) { - case 0x1: /* Queue Pair */ - { - struct ehca_qp *qp = (struct ehca_qp *)data; - - /* only print error data if AER is set */ - if (rblock[6] == 0) - return; - - ehca_err(&shca->ib_device, - "QP 0x%x (resource=%llx) has errors.", - qp->ib_qp.qp_num, resource); - break; - } - case 0x4: /* Completion Queue */ - { - struct ehca_cq *cq = (struct ehca_cq *)data; - - ehca_err(&shca->ib_device, - "CQ 0x%x (resource=%llx) has errors.", - cq->cq_number, resource); - break; - } - default: - ehca_err(&shca->ib_device, - "Unknown error type: %llx on %s.", - type, shca->ib_device.name); - break; - } - - ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); - ehca_err(&shca->ib_device, "EHCA ----- error data begin " - "---------------------------------------------------"); - ehca_dmp(rblock, length, "resource=%llx", resource); - ehca_err(&shca->ib_device, "EHCA ----- error data end " - "----------------------------------------------------"); - - return; -} - -int ehca_error_data(struct ehca_shca *shca, void *data, - u64 resource) -{ - - unsigned long ret; - u64 *rblock; - unsigned long block_count; - - rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!rblock) { - ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); - ret = -ENOMEM; - goto error_data1; - } - - /* rblock must be 4K aligned and should be 4K large */ - ret = hipz_h_error_data(shca->ipz_hca_handle, - resource, - rblock, - &block_count); - - if (ret == H_R_STATE) - ehca_err(&shca->ib_device, - "No error data is available: %llx.", resource); - else if (ret == H_SUCCESS) { - int length; - - length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); - - if (length > EHCA_PAGESIZE) - length = EHCA_PAGESIZE; - - print_error_data(shca, data, rblock, length); - } else - ehca_err(&shca->ib_device, - "Error data could not be fetched: %llx", resource); - - ehca_free_fw_ctrlblock(rblock); - -error_data1: - return ret; - -} - -static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, - enum ib_event_type event_type) -{ - struct ib_event event; - - /* PATH_MIG without the QP ever having been armed is false alarm */ - if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) - return; - - event.device = &shca->ib_device; - event.event = event_type; - - if (qp->ext_type == EQPT_SRQ) { - if (!qp->ib_srq.event_handler) - return; - - event.element.srq = &qp->ib_srq; - qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); - } else { - if (!qp->ib_qp.event_handler) - return; - - event.element.qp = &qp->ib_qp; - qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); - } -} - -static void qp_event_callback(struct ehca_shca *shca, u64 eqe, - enum ib_event_type event_type, int fatal) -{ - struct ehca_qp *qp; - u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); - - read_lock(&ehca_qp_idr_lock); - qp = idr_find(&ehca_qp_idr, token); - if (qp) - atomic_inc(&qp->nr_events); - read_unlock(&ehca_qp_idr_lock); - - if (!qp) - return; - - if (fatal) - ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); - - dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? - IB_EVENT_SRQ_ERR : event_type); - - /* - * eHCA only processes one WQE at a time for SRQ base QPs, - * so the last WQE has been processed as soon as the QP enters - * error state. - */ - if (fatal && qp->ext_type == EQPT_SRQBASE) - dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); - - if (atomic_dec_and_test(&qp->nr_events)) - wake_up(&qp->wait_completion); - return; -} - -static void cq_event_callback(struct ehca_shca *shca, - u64 eqe) -{ - struct ehca_cq *cq; - u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); - - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, token); - if (cq) - atomic_inc(&cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - - if (!cq) - return; - - ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); - - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - - return; -} - -static void parse_identifier(struct ehca_shca *shca, u64 eqe) -{ - u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); - - switch (identifier) { - case 0x02: /* path migrated */ - qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); - break; - case 0x03: /* communication established */ - qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); - break; - case 0x04: /* send queue drained */ - qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); - break; - case 0x05: /* QP error */ - case 0x06: /* QP error */ - qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); - break; - case 0x07: /* CQ error */ - case 0x08: /* CQ error */ - cq_event_callback(shca, eqe); - break; - case 0x09: /* MRMWPTE error */ - ehca_err(&shca->ib_device, "MRMWPTE error."); - break; - case 0x0A: /* port event */ - ehca_err(&shca->ib_device, "Port event."); - break; - case 0x0B: /* MR access error */ - ehca_err(&shca->ib_device, "MR access error."); - break; - case 0x0C: /* EQ error */ - ehca_err(&shca->ib_device, "EQ error."); - break; - case 0x0D: /* P/Q_Key mismatch */ - ehca_err(&shca->ib_device, "P/Q_Key mismatch."); - break; - case 0x10: /* sampling complete */ - ehca_err(&shca->ib_device, "Sampling complete."); - break; - case 0x11: /* unaffiliated access error */ - ehca_err(&shca->ib_device, "Unaffiliated access error."); - break; - case 0x12: /* path migrating */ - ehca_err(&shca->ib_device, "Path migrating."); - break; - case 0x13: /* interface trace stopped */ - ehca_err(&shca->ib_device, "Interface trace stopped."); - break; - case 0x14: /* first error capture info available */ - ehca_info(&shca->ib_device, "First error capture available"); - break; - case 0x15: /* SRQ limit reached */ - qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); - break; - default: - ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", - identifier, shca->ib_device.name); - break; - } - - return; -} - -static void dispatch_port_event(struct ehca_shca *shca, int port_num, - enum ib_event_type type, const char *msg) -{ - struct ib_event event; - - ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); - event.device = &shca->ib_device; - event.event = type; - event.element.port_num = port_num; - ib_dispatch_event(&event); -} - -static void notify_port_conf_change(struct ehca_shca *shca, int port_num) -{ - struct ehca_sma_attr new_attr; - struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; - - ehca_query_sma_attr(shca, port_num, &new_attr); - - if (new_attr.sm_sl != old_attr->sm_sl || - new_attr.sm_lid != old_attr->sm_lid) - dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, - "SM changed"); - - if (new_attr.lid != old_attr->lid || - new_attr.lmc != old_attr->lmc) - dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, - "LID changed"); - - if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || - memcmp(new_attr.pkeys, old_attr->pkeys, - sizeof(u16) * new_attr.pkey_tbl_len)) - dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, - "P_Key changed"); - - *old_attr = new_attr; -} - -/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ -static int replay_modify_qp(struct ehca_sport *sport) -{ - int aqp1_destroyed; - unsigned long flags; - - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - - aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; - - if (sport->ibqp_sqp[IB_QPT_SMI]) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); - if (!aqp1_destroyed) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); - - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - - return aqp1_destroyed; -} - -static void parse_ec(struct ehca_shca *shca, u64 eqe) -{ - u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); - u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); - u8 spec_event; - struct ehca_sport *sport = &shca->sport[port - 1]; - - switch (ec) { - case 0x30: /* port availability change */ - if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - /* only replay modify_qp calls in autodetect mode; - * if AQP1 was destroyed, the port is already down - * again and we can drop the event. - */ - if (ehca_nr_ports < 0) - if (replay_modify_qp(sport)) - break; - - sport->port_state = IB_PORT_ACTIVE; - dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, - "is active"); - ehca_query_sma_attr(shca, port, &sport->saved_attr); - } else { - sport->port_state = IB_PORT_DOWN; - dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, - "is inactive"); - } - break; - case 0x31: - /* port configuration change - * disruptive change is caused by - * LID, PKEY or SM change - */ - if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { - ehca_warn(&shca->ib_device, "disruptive port " - "%d configuration change", port); - - sport->port_state = IB_PORT_DOWN; - dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, - "is inactive"); - - sport->port_state = IB_PORT_ACTIVE; - dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, - "is active"); - ehca_query_sma_attr(shca, port, - &sport->saved_attr); - } else - notify_port_conf_change(shca, port); - break; - case 0x32: /* adapter malfunction */ - ehca_err(&shca->ib_device, "Adapter malfunction."); - break; - case 0x33: /* trace stopped */ - ehca_err(&shca->ib_device, "Traced stopped."); - break; - case 0x34: /* util async event */ - spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); - if (spec_event == 0x80) /* client reregister required */ - dispatch_port_event(shca, port, - IB_EVENT_CLIENT_REREGISTER, - "client reregister req."); - else - ehca_warn(&shca->ib_device, "Unknown util async " - "event %x on port %x", spec_event, port); - break; - default: - ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", - ec, shca->ib_device.name); - break; - } - - return; -} - -static inline void reset_eq_pending(struct ehca_cq *cq) -{ - u64 CQx_EP; - struct h_galpa gal = cq->galpas.kernel; - - hipz_galpa_store_cq(gal, cqx_ep, 0x0); - CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); - - return; -} - -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) -{ - struct ehca_shca *shca = (struct ehca_shca*)dev_id; - - tasklet_hi_schedule(&shca->neq.interrupt_task); - - return IRQ_HANDLED; -} - -void ehca_tasklet_neq(unsigned long data) -{ - struct ehca_shca *shca = (struct ehca_shca*)data; - struct ehca_eqe *eqe; - u64 ret; - - eqe = ehca_poll_eq(shca, &shca->neq); - - while (eqe) { - if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) - parse_ec(shca, eqe->entry); - - eqe = ehca_poll_eq(shca, &shca->neq); - } - - ret = hipz_h_reset_event(shca->ipz_hca_handle, - shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); - - if (ret != H_SUCCESS) - ehca_err(&shca->ib_device, "Can't clear notification events."); - - return; -} - -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) -{ - struct ehca_shca *shca = (struct ehca_shca*)dev_id; - - tasklet_hi_schedule(&shca->eq.interrupt_task); - - return IRQ_HANDLED; -} - - -static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) -{ - u64 eqe_value; - u32 token; - struct ehca_cq *cq; - - eqe_value = eqe->entry; - ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { - ehca_dbg(&shca->ib_device, "Got completion event"); - token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, token); - if (cq) - atomic_inc(&cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - if (cq == NULL) { - ehca_err(&shca->ib_device, - "Invalid eqe for non-existing cq token=%x", - token); - return; - } - reset_eq_pending(cq); - if (ehca_scaling_code) - queue_comp_task(cq); - else { - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - } - } else { - ehca_dbg(&shca->ib_device, "Got non completion event"); - parse_identifier(shca, eqe_value); - } -} - -void ehca_process_eq(struct ehca_shca *shca, int is_irq) -{ - struct ehca_eq *eq = &shca->eq; - struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value, ret; - int eqe_cnt, i; - int eq_empty = 0; - - spin_lock(&eq->irq_spinlock); - if (is_irq) { - const int max_query_cnt = 100; - int query_cnt = 0; - int int_state = 1; - do { - int_state = hipz_h_query_int_state( - shca->ipz_hca_handle, eq->ist); - query_cnt++; - iosync(); - } while (int_state && query_cnt < max_query_cnt); - if (unlikely((query_cnt == max_query_cnt))) - ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", - int_state, query_cnt); - } - - /* read out all eqes */ - eqe_cnt = 0; - do { - u32 token; - eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); - if (!eqe_cache[eqe_cnt].eqe) - break; - eqe_value = eqe_cache[eqe_cnt].eqe->entry; - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { - token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - read_lock(&ehca_cq_idr_lock); - eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); - if (eqe_cache[eqe_cnt].cq) - atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - if (!eqe_cache[eqe_cnt].cq) { - ehca_err(&shca->ib_device, - "Invalid eqe for non-existing cq " - "token=%x", token); - continue; - } - } else - eqe_cache[eqe_cnt].cq = NULL; - eqe_cnt++; - } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); - if (!eqe_cnt) { - if (is_irq) - ehca_dbg(&shca->ib_device, - "No eqe found for irq event"); - goto unlock_irq_spinlock; - } else if (!is_irq) { - ret = hipz_h_eoi(eq->ist); - if (ret != H_SUCCESS) - ehca_err(&shca->ib_device, - "bad return code EOI -rc = %lld\n", ret); - ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); - } - if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) - ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); - /* enable irq for new packets */ - for (i = 0; i < eqe_cnt; i++) { - if (eq->eqe_cache[i].cq) - reset_eq_pending(eq->eqe_cache[i].cq); - } - /* check eq */ - spin_lock(&eq->spinlock); - eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); - spin_unlock(&eq->spinlock); - /* call completion handler for cached eqes */ - for (i = 0; i < eqe_cnt; i++) - if (eq->eqe_cache[i].cq) { - if (ehca_scaling_code) - queue_comp_task(eq->eqe_cache[i].cq); - else { - struct ehca_cq *cq = eq->eqe_cache[i].cq; - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - } - } else { - ehca_dbg(&shca->ib_device, "Got non completion event"); - parse_identifier(shca, eq->eqe_cache[i].eqe->entry); - } - /* poll eq if not empty */ - if (eq_empty) - goto unlock_irq_spinlock; - do { - struct ehca_eqe *eqe; - eqe = ehca_poll_eq(shca, &shca->eq); - if (!eqe) - break; - process_eqe(shca, eqe); - } while (1); - -unlock_irq_spinlock: - spin_unlock(&eq->irq_spinlock); -} - -void ehca_tasklet_eq(unsigned long data) -{ - ehca_process_eq((struct ehca_shca*)data, 1); -} - -static int find_next_online_cpu(struct ehca_comp_pool *pool) -{ - int cpu; - unsigned long flags; - - WARN_ON_ONCE(!in_interrupt()); - if (ehca_debug_level >= 3) - ehca_dmp(cpu_online_mask, cpumask_size(), ""); - - spin_lock_irqsave(&pool->last_cpu_lock, flags); - do { - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); - pool->last_cpu = cpu; - } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); - spin_unlock_irqrestore(&pool->last_cpu_lock, flags); - - return cpu; -} - -static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct, - struct task_struct *thread) -{ - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); - spin_lock(&__cq->task_lock); - - if (__cq->nr_callbacks == 0) { - __cq->nr_callbacks++; - list_add_tail(&__cq->entry, &cct->cq_list); - cct->cq_jobs++; - wake_up_process(thread); - } else - __cq->nr_callbacks++; - - spin_unlock(&__cq->task_lock); - spin_unlock_irqrestore(&cct->task_lock, flags); -} - -static void queue_comp_task(struct ehca_cq *__cq) -{ - int cpu_id; - struct ehca_cpu_comp_task *cct; - struct task_struct *thread; - int cq_jobs; - unsigned long flags; - - cpu_id = find_next_online_cpu(pool); - BUG_ON(!cpu_online(cpu_id)); - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); - BUG_ON(!cct || !thread); - - spin_lock_irqsave(&cct->task_lock, flags); - cq_jobs = cct->cq_jobs; - spin_unlock_irqrestore(&cct->task_lock, flags); - if (cq_jobs > 0) { - cpu_id = find_next_online_cpu(pool); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); - BUG_ON(!cct || !thread); - } - __queue_comp_task(__cq, cct, thread); -} - -static void run_comp_task(struct ehca_cpu_comp_task *cct) -{ - struct ehca_cq *cq; - - while (!list_empty(&cct->cq_list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irq(&cct->task_lock); - - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - - spin_lock_irq(&cct->task_lock); - spin_lock(&cq->task_lock); - cq->nr_callbacks--; - if (!cq->nr_callbacks) { - list_del_init(cct->cq_list.next); - cct->cq_jobs--; - } - spin_unlock(&cq->task_lock); - } -} - -static void comp_task_park(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - struct ehca_cpu_comp_task *target; - struct task_struct *thread; - struct ehca_cq *cq, *tmp; - LIST_HEAD(list); - - spin_lock_irq(&cct->task_lock); - cct->cq_jobs = 0; - cct->active = 0; - list_splice_init(&cct->cq_list, &list); - spin_unlock_irq(&cct->task_lock); - - cpu = find_next_online_cpu(pool); - target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); - spin_lock_irq(&target->task_lock); - list_for_each_entry_safe(cq, tmp, &list, entry) { - list_del(&cq->entry); - __queue_comp_task(cq, target, thread); - } - spin_unlock_irq(&target->task_lock); -} - -static void comp_task_stop(unsigned int cpu, bool online) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irq(&cct->task_lock); - cct->cq_jobs = 0; - cct->active = 0; - WARN_ON(!list_empty(&cct->cq_list)); - spin_unlock_irq(&cct->task_lock); -} - -static int comp_task_should_run(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - return cct->cq_jobs; -} - -static void comp_task(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); - int cql_empty; - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - if (!cql_empty) { - __set_current_state(TASK_RUNNING); - run_comp_task(cct); - } - spin_unlock_irq(&cct->task_lock); -} - -static struct smp_hotplug_thread comp_pool_threads = { - .thread_should_run = comp_task_should_run, - .thread_fn = comp_task, - .thread_comm = "ehca_comp/%u", - .cleanup = comp_task_stop, - .park = comp_task_park, -}; - -int ehca_create_comp_pool(void) -{ - int cpu, ret = -ENOMEM; - - if (!ehca_scaling_code) - return 0; - - pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); - if (pool == NULL) - return -ENOMEM; - - spin_lock_init(&pool->last_cpu_lock); - pool->last_cpu = cpumask_any(cpu_online_mask); - - pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (!pool->cpu_comp_tasks) - goto out_pool; - - pool->cpu_comp_threads = alloc_percpu(struct task_struct *); - if (!pool->cpu_comp_threads) - goto out_tasks; - - for_each_present_cpu(cpu) { - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - } - - comp_pool_threads.store = pool->cpu_comp_threads; - ret = smpboot_register_percpu_thread(&comp_pool_threads); - if (ret) - goto out_threads; - - pr_info("eHCA scaling code enabled\n"); - return ret; - -out_threads: - free_percpu(pool->cpu_comp_threads); -out_tasks: - free_percpu(pool->cpu_comp_tasks); -out_pool: - kfree(pool); - return ret; -} - -void ehca_destroy_comp_pool(void) -{ - if (!ehca_scaling_code) - return; - - smpboot_unregister_percpu_thread(&comp_pool_threads); - - free_percpu(pool->cpu_comp_threads); - free_percpu(pool->cpu_comp_tasks); - kfree(pool); -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_irq.h b/kernel/drivers/infiniband/hw/ehca/ehca_irq.h deleted file mode 100644 index 5370199f0..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_irq.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Function definitions and structs for EQs, NEQs and interrupts - * - * Authors: Heiko J Schick <schickhj@de.ibm.com> - * Khadija Souissi <souissi@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_IRQ_H -#define __EHCA_IRQ_H - - -struct ehca_shca; - -#include <linux/interrupt.h> -#include <linux/types.h> - -int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); - -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); -void ehca_tasklet_neq(unsigned long data); - -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); -void ehca_tasklet_eq(unsigned long data); -void ehca_process_eq(struct ehca_shca *shca, int is_irq); - -struct ehca_cpu_comp_task { - struct list_head cq_list; - spinlock_t task_lock; - int cq_jobs; - int active; -}; - -struct ehca_comp_pool { - struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; - struct task_struct * __percpu *cpu_comp_threads; - int last_cpu; - spinlock_t last_cpu_lock; -}; - -int ehca_create_comp_pool(void); -void ehca_destroy_comp_pool(void); - -#endif diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_iverbs.h b/kernel/drivers/infiniband/hw/ehca/ehca_iverbs.h deleted file mode 100644 index 22f79afa7..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Function definitions for internal functions - * - * Authors: Heiko J Schick <schickhj@de.ibm.com> - * Dietmar Decker <ddecker@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_IVERBS_H__ -#define __EHCA_IVERBS_H__ - -#include "ehca_classes.h" - -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); - -int ehca_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props); - -int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, - struct ehca_sma_attr *attr); - -int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); - -int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid); - -int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, - struct ib_port_modify *props); - -struct ib_pd *ehca_alloc_pd(struct ib_device *device, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ehca_dealloc_pd(struct ib_pd *pd); - -struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); - -int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -int ehca_destroy_ah(struct ib_ah *ah); - -struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); - -struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, u64 *iova_start); - -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int mr_access_flags, - struct ib_udata *udata); - -int ehca_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, int mr_access_flags, u64 *iova_start); - -int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); - -int ehca_dereg_mr(struct ib_mr *mr); - -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); - -int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind); - -int ehca_dealloc_mw(struct ib_mw *mw); - -struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -int ehca_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, int list_len, u64 iova); - -int ehca_unmap_fmr(struct list_head *fmr_list); - -int ehca_dealloc_fmr(struct ib_fmr *fmr); - -enum ehca_eq_type { - EHCA_EQ = 0, /* Event Queue */ - EHCA_NEQ /* Notification Event Queue */ -}; - -int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, - enum ehca_eq_type type, const u32 length); - -int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); - -void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); - - -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ehca_destroy_cq(struct ib_cq *cq); - -int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); - -int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); - -int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); - -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); - -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - -int ehca_destroy_qp(struct ib_qp *qp); - -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata); - -int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); - -int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); - -int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - -int ehca_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - -struct ib_srq *ehca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); - -int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); - -int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - -int ehca_destroy_srq(struct ib_srq *srq); - -u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, - struct ib_qp_init_attr *qp_init_attr); - -int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, - struct ib_udata *udata); - -int ehca_dealloc_ucontext(struct ib_ucontext *context); - -int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); - -int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad); - -void ehca_poll_eqs(unsigned long data); - -int ehca_calc_ipd(struct ehca_shca *shca, int port, - enum ib_rate path_rate, u32 *ipd); - -void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); - -#ifdef CONFIG_PPC_64K_PAGES -void *ehca_alloc_fw_ctrlblock(gfp_t flags); -void ehca_free_fw_ctrlblock(void *ptr); -#else -#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) -#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) -#endif - -void ehca_recover_sqp(struct ib_qp *sqp); - -#endif diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_main.c b/kernel/drivers/infiniband/hw/ehca/ehca_main.c deleted file mode 100644 index cd8d290a0..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_main.c +++ /dev/null @@ -1,1100 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * module start stop, hca detection - * - * Authors: Heiko J Schick <schickhj@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Joachim Fenkes <fenkes@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef CONFIG_PPC_64K_PAGES -#include <linux/slab.h> -#endif - -#include <linux/notifier.h> -#include <linux/memory.h> -#include "ehca_classes.h" -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "ehca_tools.h" -#include "hcp_if.h" - -#define HCAD_VERSION "0029" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); -MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION(HCAD_VERSION); - -static bool ehca_open_aqp1 = 0; -static int ehca_hw_level = 0; -static bool ehca_poll_all_eqs = 1; - -int ehca_debug_level = 0; -int ehca_nr_ports = -1; -bool ehca_use_hp_mr = 0; -int ehca_port_act_time = 30; -int ehca_static_rate = -1; -bool ehca_scaling_code = 0; -int ehca_lock_hcalls = -1; -int ehca_max_cq = -1; -int ehca_max_qp = -1; - -module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); -module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); -module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); -module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); -module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); -module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); -module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); -module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); -module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); -module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); -module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); - -MODULE_PARM_DESC(open_aqp1, - "Open AQP1 on startup (default: no)"); -MODULE_PARM_DESC(debug_level, - "Amount of debug output (0: none (default), 1: traces, " - "2: some dumps, 3: lots)"); -MODULE_PARM_DESC(hw_level, - "Hardware level (0: autosensing (default), " - "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); -MODULE_PARM_DESC(nr_ports, - "number of connected ports (-1: autodetect (default), " - "1: port one only, 2: two ports)"); -MODULE_PARM_DESC(use_hp_mr, - "Use high performance MRs (default: no)"); -MODULE_PARM_DESC(port_act_time, - "Time to wait for port activation (default: 30 sec)"); -MODULE_PARM_DESC(poll_all_eqs, - "Poll all event queues periodically (default: yes)"); -MODULE_PARM_DESC(static_rate, - "Set permanent static rate (default: no static rate)"); -MODULE_PARM_DESC(scaling_code, - "Enable scaling code (default: no)"); -MODULE_PARM_DESC(lock_hcalls, - "Serialize all hCalls made by the driver " - "(default: autodetect)"); -MODULE_PARM_DESC(number_of_cqs, - "Max number of CQs which can be allocated " - "(default: autodetect)"); -MODULE_PARM_DESC(number_of_qps, - "Max number of QPs which can be allocated " - "(default: autodetect)"); - -DEFINE_RWLOCK(ehca_qp_idr_lock); -DEFINE_RWLOCK(ehca_cq_idr_lock); -DEFINE_IDR(ehca_qp_idr); -DEFINE_IDR(ehca_cq_idr); - -static LIST_HEAD(shca_list); /* list of all registered ehcas */ -DEFINE_SPINLOCK(shca_list_lock); - -static struct timer_list poll_eqs_timer; - -#ifdef CONFIG_PPC_64K_PAGES -static struct kmem_cache *ctblk_cache; - -void *ehca_alloc_fw_ctrlblock(gfp_t flags) -{ - void *ret = kmem_cache_zalloc(ctblk_cache, flags); - if (!ret) - ehca_gen_err("Out of memory for ctblk"); - return ret; -} - -void ehca_free_fw_ctrlblock(void *ptr) -{ - if (ptr) - kmem_cache_free(ctblk_cache, ptr); - -} -#endif - -int ehca2ib_return_code(u64 ehca_rc) -{ - switch (ehca_rc) { - case H_SUCCESS: - return 0; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: - return -EBUSY; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - default: - return -EINVAL; - } -} - -static int ehca_create_slab_caches(void) -{ - int ret; - - ret = ehca_init_pd_cache(); - if (ret) { - ehca_gen_err("Cannot create PD SLAB cache."); - return ret; - } - - ret = ehca_init_cq_cache(); - if (ret) { - ehca_gen_err("Cannot create CQ SLAB cache."); - goto create_slab_caches2; - } - - ret = ehca_init_qp_cache(); - if (ret) { - ehca_gen_err("Cannot create QP SLAB cache."); - goto create_slab_caches3; - } - - ret = ehca_init_av_cache(); - if (ret) { - ehca_gen_err("Cannot create AV SLAB cache."); - goto create_slab_caches4; - } - - ret = ehca_init_mrmw_cache(); - if (ret) { - ehca_gen_err("Cannot create MR&MW SLAB cache."); - goto create_slab_caches5; - } - - ret = ehca_init_small_qp_cache(); - if (ret) { - ehca_gen_err("Cannot create small queue SLAB cache."); - goto create_slab_caches6; - } - -#ifdef CONFIG_PPC_64K_PAGES - ctblk_cache = kmem_cache_create("ehca_cache_ctblk", - EHCA_PAGESIZE, H_CB_ALIGNMENT, - SLAB_HWCACHE_ALIGN, - NULL); - if (!ctblk_cache) { - ehca_gen_err("Cannot create ctblk SLAB cache."); - ehca_cleanup_small_qp_cache(); - ret = -ENOMEM; - goto create_slab_caches6; - } -#endif - return 0; - -create_slab_caches6: - ehca_cleanup_mrmw_cache(); - -create_slab_caches5: - ehca_cleanup_av_cache(); - -create_slab_caches4: - ehca_cleanup_qp_cache(); - -create_slab_caches3: - ehca_cleanup_cq_cache(); - -create_slab_caches2: - ehca_cleanup_pd_cache(); - - return ret; -} - -static void ehca_destroy_slab_caches(void) -{ - ehca_cleanup_small_qp_cache(); - ehca_cleanup_mrmw_cache(); - ehca_cleanup_av_cache(); - ehca_cleanup_qp_cache(); - ehca_cleanup_cq_cache(); - ehca_cleanup_pd_cache(); -#ifdef CONFIG_PPC_64K_PAGES - if (ctblk_cache) - kmem_cache_destroy(ctblk_cache); -#endif -} - -#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) -#define EHCA_REVID EHCA_BMASK_IBM(40, 63) - -static struct cap_descr { - u64 mask; - char *descr; -} hca_cap_descr[] = { - { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, - { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, - { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, - { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, - { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, - { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, - { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, - { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, - { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, - { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, - { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, - { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, - { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, - { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, - { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, - { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, - { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, - { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, -}; - -static int ehca_sense_attributes(struct ehca_shca *shca) -{ - int i, ret = 0; - u64 h_ret; - struct hipz_query_hca *rblock; - struct hipz_query_port *port; - const char *loc_code; - - static const u32 pgsize_map[] = { - HCA_CAP_MR_PGSIZE_4K, 0x1000, - HCA_CAP_MR_PGSIZE_64K, 0x10000, - HCA_CAP_MR_PGSIZE_1M, 0x100000, - HCA_CAP_MR_PGSIZE_16M, 0x1000000, - }; - - ehca_gen_dbg("Probing adapter %s...", - shca->ofdev->dev.of_node->full_name); - loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", - NULL); - if (loc_code) - ehca_gen_dbg(" ... location lode=%s", loc_code); - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_gen_err("Cannot allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); - if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query device properties. h_ret=%lli", - h_ret); - ret = -EPERM; - goto sense_attributes1; - } - - if (ehca_nr_ports == 1) - shca->num_ports = 1; - else - shca->num_ports = (u8)rblock->num_ports; - - ehca_gen_dbg(" ... found %x ports", rblock->num_ports); - - if (ehca_hw_level == 0) { - u32 hcaaver; - u32 revid; - - hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); - revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); - - ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); - - if (hcaaver == 1) { - if (revid <= 3) - shca->hw_level = 0x10 | (revid + 1); - else - shca->hw_level = 0x14; - } else if (hcaaver == 2) { - if (revid == 0) - shca->hw_level = 0x21; - else if (revid == 0x10) - shca->hw_level = 0x22; - else if (revid == 0x20 || revid == 0x21) - shca->hw_level = 0x23; - } - - if (!shca->hw_level) { - ehca_gen_warn("unknown hardware version" - " - assuming default level"); - shca->hw_level = 0x22; - } - } else - shca->hw_level = ehca_hw_level; - ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - - shca->hca_cap = rblock->hca_cap_indicators; - ehca_gen_dbg(" ... HCA capabilities:"); - for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) - if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) - ehca_gen_dbg(" %s", hca_cap_descr[i].descr); - - /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is - * a firmware property, so it's valid across all adapters - */ - if (ehca_lock_hcalls == -1) - ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, - shca->hca_cap); - - /* translate supported MR page sizes; always support 4K */ - shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; - for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) - if (rblock->memory_page_size_supported & pgsize_map[i]) - shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; - - /* Set maximum number of CQs and QPs to calculate EQ size */ - if (shca->max_num_qps == -1) - shca->max_num_qps = min_t(int, rblock->max_qp, - EHCA_MAX_NUM_QUEUES); - else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { - ehca_gen_warn("The requested number of QPs is out of range " - "(1 - %i) specified by HW. Value is set to %i", - rblock->max_qp, rblock->max_qp); - shca->max_num_qps = rblock->max_qp; - } - - if (shca->max_num_cqs == -1) - shca->max_num_cqs = min_t(int, rblock->max_cq, - EHCA_MAX_NUM_QUEUES); - else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { - ehca_gen_warn("The requested number of CQs is out of range " - "(1 - %i) specified by HW. Value is set to %i", - rblock->max_cq, rblock->max_cq); - } - - /* query max MTU from first port -- it's the same for all ports */ - port = (struct hipz_query_port *)rblock; - h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); - if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query port properties. h_ret=%lli", - h_ret); - ret = -EPERM; - goto sense_attributes1; - } - - shca->max_mtu = port->max_mtu; - -sense_attributes1: - ehca_free_fw_ctrlblock(rblock); - return ret; -} - -static int init_node_guid(struct ehca_shca *shca) -{ - int ret = 0; - struct hipz_query_hca *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query device properties"); - ret = -EINVAL; - goto init_node_guid1; - } - - memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); - -init_node_guid1: - ehca_free_fw_ctrlblock(rblock); - return ret; -} - -static int ehca_init_device(struct ehca_shca *shca) -{ - int ret; - - ret = init_node_guid(shca); - if (ret) - return ret; - - strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); - shca->ib_device.owner = THIS_MODULE; - - shca->ib_device.uverbs_abi_ver = 8; - shca->ib_device.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); - - shca->ib_device.node_type = RDMA_NODE_IB_CA; - shca->ib_device.phys_port_cnt = shca->num_ports; - shca->ib_device.num_comp_vectors = 1; - shca->ib_device.dma_device = &shca->ofdev->dev; - shca->ib_device.query_device = ehca_query_device; - shca->ib_device.query_port = ehca_query_port; - shca->ib_device.query_gid = ehca_query_gid; - shca->ib_device.query_pkey = ehca_query_pkey; - /* shca->in_device.modify_device = ehca_modify_device */ - shca->ib_device.modify_port = ehca_modify_port; - shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; - shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; - shca->ib_device.alloc_pd = ehca_alloc_pd; - shca->ib_device.dealloc_pd = ehca_dealloc_pd; - shca->ib_device.create_ah = ehca_create_ah; - /* shca->ib_device.modify_ah = ehca_modify_ah; */ - shca->ib_device.query_ah = ehca_query_ah; - shca->ib_device.destroy_ah = ehca_destroy_ah; - shca->ib_device.create_qp = ehca_create_qp; - shca->ib_device.modify_qp = ehca_modify_qp; - shca->ib_device.query_qp = ehca_query_qp; - shca->ib_device.destroy_qp = ehca_destroy_qp; - shca->ib_device.post_send = ehca_post_send; - shca->ib_device.post_recv = ehca_post_recv; - shca->ib_device.create_cq = ehca_create_cq; - shca->ib_device.destroy_cq = ehca_destroy_cq; - shca->ib_device.resize_cq = ehca_resize_cq; - shca->ib_device.poll_cq = ehca_poll_cq; - /* shca->ib_device.peek_cq = ehca_peek_cq; */ - shca->ib_device.req_notify_cq = ehca_req_notify_cq; - /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ - shca->ib_device.get_dma_mr = ehca_get_dma_mr; - shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; - shca->ib_device.reg_user_mr = ehca_reg_user_mr; - shca->ib_device.query_mr = ehca_query_mr; - shca->ib_device.dereg_mr = ehca_dereg_mr; - shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; - shca->ib_device.alloc_mw = ehca_alloc_mw; - shca->ib_device.bind_mw = ehca_bind_mw; - shca->ib_device.dealloc_mw = ehca_dealloc_mw; - shca->ib_device.alloc_fmr = ehca_alloc_fmr; - shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; - shca->ib_device.unmap_fmr = ehca_unmap_fmr; - shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; - shca->ib_device.attach_mcast = ehca_attach_mcast; - shca->ib_device.detach_mcast = ehca_detach_mcast; - shca->ib_device.process_mad = ehca_process_mad; - shca->ib_device.mmap = ehca_mmap; - shca->ib_device.dma_ops = &ehca_dma_mapping_ops; - - if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - shca->ib_device.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - - shca->ib_device.create_srq = ehca_create_srq; - shca->ib_device.modify_srq = ehca_modify_srq; - shca->ib_device.query_srq = ehca_query_srq; - shca->ib_device.destroy_srq = ehca_destroy_srq; - shca->ib_device.post_srq_recv = ehca_post_srq_recv; - } - - return ret; -} - -static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) -{ - struct ehca_sport *sport = &shca->sport[port - 1]; - struct ib_cq *ibcq; - struct ib_qp *ibqp; - struct ib_qp_init_attr qp_init_attr; - int ret; - - if (sport->ibcq_aqp1) { - ehca_err(&shca->ib_device, "AQP1 CQ is already created."); - return -EPERM; - } - - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0); - if (IS_ERR(ibcq)) { - ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); - return PTR_ERR(ibcq); - } - sport->ibcq_aqp1 = ibcq; - - if (sport->ibqp_sqp[IB_QPT_GSI]) { - ehca_err(&shca->ib_device, "AQP1 QP is already created."); - ret = -EPERM; - goto create_aqp1; - } - - memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); - qp_init_attr.send_cq = ibcq; - qp_init_attr.recv_cq = ibcq; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.cap.max_send_wr = 100; - qp_init_attr.cap.max_recv_wr = 100; - qp_init_attr.cap.max_send_sge = 2; - qp_init_attr.cap.max_recv_sge = 1; - qp_init_attr.qp_type = IB_QPT_GSI; - qp_init_attr.port_num = port; - qp_init_attr.qp_context = NULL; - qp_init_attr.event_handler = NULL; - qp_init_attr.srq = NULL; - - ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); - if (IS_ERR(ibqp)) { - ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); - ret = PTR_ERR(ibqp); - goto create_aqp1; - } - sport->ibqp_sqp[IB_QPT_GSI] = ibqp; - - return 0; - -create_aqp1: - ib_destroy_cq(sport->ibcq_aqp1); - return ret; -} - -static int ehca_destroy_aqp1(struct ehca_sport *sport) -{ - int ret; - - ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); - if (ret) { - ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); - return ret; - } - - ret = ib_destroy_cq(sport->ibcq_aqp1); - if (ret) - ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); - - return ret; -} - -static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); -} - -static ssize_t ehca_store_debug_level(struct device_driver *ddp, - const char *buf, size_t count) -{ - int value = (*buf) - '0'; - if (value >= 0 && value <= 9) - ehca_debug_level = value; - return 1; -} - -static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, - ehca_show_debug_level, ehca_store_debug_level); - -static struct attribute *ehca_drv_attrs[] = { - &driver_attr_debug_level.attr, - NULL -}; - -static struct attribute_group ehca_drv_attr_grp = { - .attrs = ehca_drv_attrs -}; - -static const struct attribute_group *ehca_drv_attr_groups[] = { - &ehca_drv_attr_grp, - NULL, -}; - -#define EHCA_RESOURCE_ATTR(name) \ -static ssize_t ehca_show_##name(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ -{ \ - struct ehca_shca *shca; \ - struct hipz_query_hca *rblock; \ - int data; \ - \ - shca = dev_get_drvdata(dev); \ - \ - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ - if (!rblock) { \ - dev_err(dev, "Can't allocate rblock memory.\n"); \ - return 0; \ - } \ - \ - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ - dev_err(dev, "Can't query device properties\n"); \ - ehca_free_fw_ctrlblock(rblock); \ - return 0; \ - } \ - \ - data = rblock->name; \ - ehca_free_fw_ctrlblock(rblock); \ - \ - if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ - return snprintf(buf, 256, "1\n"); \ - else \ - return snprintf(buf, 256, "%d\n", data); \ - \ -} \ -static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); - -EHCA_RESOURCE_ATTR(num_ports); -EHCA_RESOURCE_ATTR(hw_ver); -EHCA_RESOURCE_ATTR(max_eq); -EHCA_RESOURCE_ATTR(cur_eq); -EHCA_RESOURCE_ATTR(max_cq); -EHCA_RESOURCE_ATTR(cur_cq); -EHCA_RESOURCE_ATTR(max_qp); -EHCA_RESOURCE_ATTR(cur_qp); -EHCA_RESOURCE_ATTR(max_mr); -EHCA_RESOURCE_ATTR(cur_mr); -EHCA_RESOURCE_ATTR(max_mw); -EHCA_RESOURCE_ATTR(cur_mw); -EHCA_RESOURCE_ATTR(max_pd); -EHCA_RESOURCE_ATTR(max_ah); - -static ssize_t ehca_show_adapter_handle(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ehca_shca *shca = dev_get_drvdata(dev); - - return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); - -} -static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); - -static struct attribute *ehca_dev_attrs[] = { - &dev_attr_adapter_handle.attr, - &dev_attr_num_ports.attr, - &dev_attr_hw_ver.attr, - &dev_attr_max_eq.attr, - &dev_attr_cur_eq.attr, - &dev_attr_max_cq.attr, - &dev_attr_cur_cq.attr, - &dev_attr_max_qp.attr, - &dev_attr_cur_qp.attr, - &dev_attr_max_mr.attr, - &dev_attr_cur_mr.attr, - &dev_attr_max_mw.attr, - &dev_attr_cur_mw.attr, - &dev_attr_max_pd.attr, - &dev_attr_max_ah.attr, - NULL -}; - -static struct attribute_group ehca_dev_attr_grp = { - .attrs = ehca_dev_attrs -}; - -static int ehca_probe(struct platform_device *dev) -{ - struct ehca_shca *shca; - const u64 *handle; - struct ib_pd *ibpd; - int ret, i, eq_size; - unsigned long flags; - - handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); - if (!handle) { - ehca_gen_err("Cannot get eHCA handle for adapter: %s.", - dev->dev.of_node->full_name); - return -ENODEV; - } - - if (!(*handle)) { - ehca_gen_err("Wrong eHCA handle for adapter: %s.", - dev->dev.of_node->full_name); - return -ENODEV; - } - - shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); - if (!shca) { - ehca_gen_err("Cannot allocate shca memory."); - return -ENOMEM; - } - - mutex_init(&shca->modify_mutex); - atomic_set(&shca->num_cqs, 0); - atomic_set(&shca->num_qps, 0); - shca->max_num_qps = ehca_max_qp; - shca->max_num_cqs = ehca_max_cq; - - for (i = 0; i < ARRAY_SIZE(shca->sport); i++) - spin_lock_init(&shca->sport[i].mod_sqp_lock); - - shca->ofdev = dev; - shca->ipz_hca_handle.handle = *handle; - dev_set_drvdata(&dev->dev, shca); - - ret = ehca_sense_attributes(shca); - if (ret < 0) { - ehca_gen_err("Cannot sense eHCA attributes."); - goto probe1; - } - - ret = ehca_init_device(shca); - if (ret) { - ehca_gen_err("Cannot init ehca device struct"); - goto probe1; - } - - eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; - /* create event queues */ - ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); - if (ret) { - ehca_err(&shca->ib_device, "Cannot create EQ."); - goto probe1; - } - - ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); - if (ret) { - ehca_err(&shca->ib_device, "Cannot create NEQ."); - goto probe3; - } - - /* create internal protection domain */ - ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); - if (IS_ERR(ibpd)) { - ehca_err(&shca->ib_device, "Cannot create internal PD."); - ret = PTR_ERR(ibpd); - goto probe4; - } - - shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); - shca->pd->ib_pd.device = &shca->ib_device; - - /* create internal max MR */ - ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); - - if (ret) { - ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", - ret); - goto probe5; - } - - ret = ib_register_device(&shca->ib_device, NULL); - if (ret) { - ehca_err(&shca->ib_device, - "ib_register_device() failed ret=%i", ret); - goto probe6; - } - - /* create AQP1 for port 1 */ - if (ehca_open_aqp1 == 1) { - shca->sport[0].port_state = IB_PORT_DOWN; - ret = ehca_create_aqp1(shca, 1); - if (ret) { - ehca_err(&shca->ib_device, - "Cannot create AQP1 for port 1."); - goto probe7; - } - } - - /* create AQP1 for port 2 */ - if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { - shca->sport[1].port_state = IB_PORT_DOWN; - ret = ehca_create_aqp1(shca, 2); - if (ret) { - ehca_err(&shca->ib_device, - "Cannot create AQP1 for port 2."); - goto probe8; - } - } - - ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); - if (ret) /* only complain; we can live without attributes */ - ehca_err(&shca->ib_device, - "Cannot create device attributes ret=%d", ret); - - spin_lock_irqsave(&shca_list_lock, flags); - list_add(&shca->shca_list, &shca_list); - spin_unlock_irqrestore(&shca_list_lock, flags); - - return 0; - -probe8: - ret = ehca_destroy_aqp1(&shca->sport[0]); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port 1. ret=%i", ret); - -probe7: - ib_unregister_device(&shca->ib_device); - -probe6: - ret = ehca_dereg_internal_maxmr(shca); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%x", ret); - -probe5: - ret = ehca_dealloc_pd(&shca->pd->ib_pd); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%x", ret); - -probe4: - ret = ehca_destroy_eq(shca, &shca->neq); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy NEQ. ret=%x", ret); - -probe3: - ret = ehca_destroy_eq(shca, &shca->eq); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy EQ. ret=%x", ret); - -probe1: - ib_dealloc_device(&shca->ib_device); - - return -EINVAL; -} - -static int ehca_remove(struct platform_device *dev) -{ - struct ehca_shca *shca = dev_get_drvdata(&dev->dev); - unsigned long flags; - int ret; - - sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); - - if (ehca_open_aqp1 == 1) { - int i; - for (i = 0; i < shca->num_ports; i++) { - ret = ehca_destroy_aqp1(&shca->sport[i]); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port %x " - "ret=%i", ret, i); - } - } - - ib_unregister_device(&shca->ib_device); - - ret = ehca_dereg_internal_maxmr(shca); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%i", ret); - - ret = ehca_dealloc_pd(&shca->pd->ib_pd); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%i", ret); - - ret = ehca_destroy_eq(shca, &shca->eq); - if (ret) - ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); - - ret = ehca_destroy_eq(shca, &shca->neq); - if (ret) - ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); - - ib_dealloc_device(&shca->ib_device); - - spin_lock_irqsave(&shca_list_lock, flags); - list_del(&shca->shca_list); - spin_unlock_irqrestore(&shca_list_lock, flags); - - return ret; -} - -static struct of_device_id ehca_device_table[] = -{ - { - .name = "lhca", - .compatible = "IBM,lhca", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, ehca_device_table); - -static struct platform_driver ehca_driver = { - .probe = ehca_probe, - .remove = ehca_remove, - .driver = { - .name = "ehca", - .owner = THIS_MODULE, - .groups = ehca_drv_attr_groups, - .of_match_table = ehca_device_table, - }, -}; - -void ehca_poll_eqs(unsigned long data) -{ - struct ehca_shca *shca; - - spin_lock(&shca_list_lock); - list_for_each_entry(shca, &shca_list, shca_list) { - if (shca->eq.is_initialized) { - /* call deadman proc only if eq ptr does not change */ - struct ehca_eq *eq = &shca->eq; - int max = 3; - volatile u64 q_ofs, q_ofs2; - unsigned long flags; - spin_lock_irqsave(&eq->spinlock, flags); - q_ofs = eq->ipz_queue.current_q_offset; - spin_unlock_irqrestore(&eq->spinlock, flags); - do { - spin_lock_irqsave(&eq->spinlock, flags); - q_ofs2 = eq->ipz_queue.current_q_offset; - spin_unlock_irqrestore(&eq->spinlock, flags); - max--; - } while (q_ofs == q_ofs2 && max > 0); - if (q_ofs == q_ofs2) - ehca_process_eq(shca, 0); - } - } - mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); - spin_unlock(&shca_list_lock); -} - -static int ehca_mem_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - static unsigned long ehca_dmem_warn_time; - unsigned long flags; - - switch (action) { - case MEM_CANCEL_OFFLINE: - case MEM_CANCEL_ONLINE: - case MEM_ONLINE: - case MEM_OFFLINE: - return NOTIFY_OK; - case MEM_GOING_ONLINE: - case MEM_GOING_OFFLINE: - /* only ok if no hca is attached to the lpar */ - spin_lock_irqsave(&shca_list_lock, flags); - if (list_empty(&shca_list)) { - spin_unlock_irqrestore(&shca_list_lock, flags); - return NOTIFY_OK; - } else { - spin_unlock_irqrestore(&shca_list_lock, flags); - if (printk_timed_ratelimit(&ehca_dmem_warn_time, - 30 * 1000)) - ehca_gen_err("DMEM operations are not allowed" - "in conjunction with eHCA"); - return NOTIFY_BAD; - } - } - return NOTIFY_OK; -} - -static struct notifier_block ehca_mem_nb = { - .notifier_call = ehca_mem_notifier, -}; - -static int __init ehca_module_init(void) -{ - int ret; - - printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Version " HCAD_VERSION ")\n"); - - ret = ehca_create_comp_pool(); - if (ret) { - ehca_gen_err("Cannot create comp pool."); - return ret; - } - - ret = ehca_create_slab_caches(); - if (ret) { - ehca_gen_err("Cannot create SLAB caches"); - ret = -ENOMEM; - goto module_init1; - } - - ret = ehca_create_busmap(); - if (ret) { - ehca_gen_err("Cannot create busmap."); - goto module_init2; - } - - ret = ibmebus_register_driver(&ehca_driver); - if (ret) { - ehca_gen_err("Cannot register eHCA device driver"); - ret = -EINVAL; - goto module_init3; - } - - ret = register_memory_notifier(&ehca_mem_nb); - if (ret) { - ehca_gen_err("Failed registering memory add/remove notifier"); - goto module_init4; - } - - if (ehca_poll_all_eqs != 1) { - ehca_gen_err("WARNING!!!"); - ehca_gen_err("It is possible to lose interrupts."); - } else { - init_timer(&poll_eqs_timer); - poll_eqs_timer.function = ehca_poll_eqs; - poll_eqs_timer.expires = jiffies + HZ; - add_timer(&poll_eqs_timer); - } - - return 0; - -module_init4: - ibmebus_unregister_driver(&ehca_driver); - -module_init3: - ehca_destroy_busmap(); - -module_init2: - ehca_destroy_slab_caches(); - -module_init1: - ehca_destroy_comp_pool(); - return ret; -}; - -static void __exit ehca_module_exit(void) -{ - if (ehca_poll_all_eqs == 1) - del_timer_sync(&poll_eqs_timer); - - ibmebus_unregister_driver(&ehca_driver); - - unregister_memory_notifier(&ehca_mem_nb); - - ehca_destroy_busmap(); - - ehca_destroy_slab_caches(); - - ehca_destroy_comp_pool(); - - idr_destroy(&ehca_cq_idr); - idr_destroy(&ehca_qp_idr); -}; - -module_init(ehca_module_init); -module_exit(ehca_module_exit); diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_mcast.c b/kernel/drivers/infiniband/hw/ehca/ehca_mcast.c deleted file mode 100644 index cec181532..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_mcast.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * mcast functions - * - * Authors: Khadija Souissi <souissik@de.ibm.com> - * Waleri Fomin <fomin@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/module.h> -#include <linux/err.h> -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -#define MAX_MC_LID 0xFFFE -#define MIN_MC_LID 0xC000 /* Multicast limits */ -#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) -#define EHCA_VALID_MULTICAST_LID(lid) \ - (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) - -int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, - ib_device); - union ib_gid my_gid; - u64 subnet_prefix, interface_id, h_ret; - - if (ibqp->qp_type != IB_QPT_UD) { - ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); - return -EINVAL; - } - - if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { - ehca_err(ibqp->device, "invalid mulitcast gid"); - return -EINVAL; - } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { - ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); - return -EINVAL; - } - - memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); - - subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); - interface_id = be64_to_cpu(my_gid.global.interface_id); - h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - my_qp->galpas.kernel, - lid, subnet_prefix, interface_id); - if (h_ret != H_SUCCESS) - ehca_err(ibqp->device, - "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " - "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); - - return ehca2ib_return_code(h_ret); -} - -int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->pd->device, - struct ehca_shca, ib_device); - union ib_gid my_gid; - u64 subnet_prefix, interface_id, h_ret; - - if (ibqp->qp_type != IB_QPT_UD) { - ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); - return -EINVAL; - } - - if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { - ehca_err(ibqp->device, "invalid mulitcast gid"); - return -EINVAL; - } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { - ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); - return -EINVAL; - } - - memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); - - subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); - interface_id = be64_to_cpu(my_gid.global.interface_id); - h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - my_qp->galpas.kernel, - lid, subnet_prefix, interface_id); - if (h_ret != H_SUCCESS) - ehca_err(ibqp->device, - "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " - "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); - - return ehca2ib_return_code(h_ret); -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_mrmw.c b/kernel/drivers/infiniband/hw/ehca/ehca_mrmw.c deleted file mode 100644 index f914b3099..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ /dev/null @@ -1,2593 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * MR/MW functions - * - * Authors: Dietmar Decker <ddecker@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/slab.h> -#include <rdma/ib_umem.h> - -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "hcp_if.h" -#include "hipz_hw.h" - -#define NUM_CHUNKS(length, chunk_size) \ - (((length) + (chunk_size - 1)) / (chunk_size)) - -/* max number of rpages (per hcall register_rpages) */ -#define MAX_RPAGES 512 - -/* DMEM toleration management */ -#define EHCA_SECTSHIFT SECTION_SIZE_BITS -#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) -#define EHCA_HUGEPAGESHIFT 34 -#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) -#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) -#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL -#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ -#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) -#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) -#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ -#define EHCA_DIR_MAP_SIZE (0x10000) -#define EHCA_ENT_MAP_SIZE (0x10000) -#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) - -static unsigned long ehca_mr_len; - -/* - * Memory map data structures - */ -struct ehca_dir_bmap { - u64 ent[EHCA_MAP_ENTRIES]; -}; -struct ehca_top_bmap { - struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; -}; -struct ehca_bmap { - struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; -}; - -static struct ehca_bmap *ehca_bmap; - -static struct kmem_cache *mr_cache; -static struct kmem_cache *mw_cache; - -enum ehca_mr_pgsize { - EHCA_MR_PGSIZE4K = 0x1000L, - EHCA_MR_PGSIZE64K = 0x10000L, - EHCA_MR_PGSIZE1M = 0x100000L, - EHCA_MR_PGSIZE16M = 0x1000000L -}; - -#define EHCA_MR_PGSHIFT4K 12 -#define EHCA_MR_PGSHIFT64K 16 -#define EHCA_MR_PGSHIFT1M 20 -#define EHCA_MR_PGSHIFT16M 24 - -static u64 ehca_map_vaddr(void *caddr); - -static u32 ehca_encode_hwpage_size(u32 pgsize) -{ - int log = ilog2(pgsize); - WARN_ON(log < 12 || log > 24 || log & 3); - return (log - 12) / 4; -} - -static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) -{ - return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); -} - -static struct ehca_mr *ehca_mr_new(void) -{ - struct ehca_mr *me; - - me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); - if (me) - spin_lock_init(&me->mrlock); - else - ehca_gen_err("alloc failed"); - - return me; -} - -static void ehca_mr_delete(struct ehca_mr *me) -{ - kmem_cache_free(mr_cache, me); -} - -static struct ehca_mw *ehca_mw_new(void) -{ - struct ehca_mw *me; - - me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); - if (me) - spin_lock_init(&me->mwlock); - else - ehca_gen_err("alloc failed"); - - return me; -} - -static void ehca_mw_delete(struct ehca_mw *me) -{ - kmem_cache_free(mw_cache, me); -} - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) -{ - struct ib_mr *ib_mr; - int ret; - struct ehca_mr *e_maxmr; - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - - if (shca->maxmr) { - e_maxmr = ehca_mr_new(); - if (!e_maxmr) { - ehca_err(&shca->ib_device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto get_dma_mr_exit0; - } - - ret = ehca_reg_maxmr(shca, e_maxmr, - (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), - mr_access_flags, e_pd, - &e_maxmr->ib.ib_mr.lkey, - &e_maxmr->ib.ib_mr.rkey); - if (ret) { - ehca_mr_delete(e_maxmr); - ib_mr = ERR_PTR(ret); - goto get_dma_mr_exit0; - } - ib_mr = &e_maxmr->ib.ib_mr; - } else { - ehca_err(&shca->ib_device, "no internal max-MR exist!"); - ib_mr = ERR_PTR(-EINVAL); - goto get_dma_mr_exit0; - } - -get_dma_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", - PTR_ERR(ib_mr), pd, mr_access_flags); - return ib_mr; -} /* end ehca_get_dma_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - struct ib_mr *ib_mr; - int ret; - struct ehca_mr *e_mr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - - u64 size; - - if ((num_phys_buf <= 0) || !phys_buf_array) { - ehca_err(pd->device, "bad input values: num_phys_buf=%x " - "phys_buf_array=%p", num_phys_buf, phys_buf_array); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - - /* check physical buffer list and calculate size */ - ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, - iova_start, &size); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit0; - } - if ((size == 0) || - (((u64)iova_start + size) < (u64)iova_start)) { - ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", - size, iova_start); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(pd->device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto reg_phys_mr_exit0; - } - - /* register MR on HCA */ - if (ehca_mr_is_maxmr(size, iova_start)) { - e_mr->flags |= EHCA_MR_FLAG_MAXMR; - ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, - e_pd, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit1; - } - } else { - struct ehca_mr_pginfo pginfo; - u32 num_kpages; - u32 num_hwpages; - u64 hw_pgsize; - - num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, - PAGE_SIZE); - /* for kernel space we try most possible pgsize */ - hw_pgsize = ehca_get_max_hwpage_size(shca); - num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, - hw_pgsize); - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.num_hwpages = num_hwpages; - pginfo.u.phy.num_phys_buf = num_phys_buf; - pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = - ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, - e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit1; - } - } - - /* successful registration of all pages */ - return &e_mr->ib.ib_mr; - -reg_phys_mr_exit1: - ehca_mr_delete(e_mr); -reg_phys_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " - "num_phys_buf=%x mr_access_flags=%x iova_start=%p", - PTR_ERR(ib_mr), pd, phys_buf_array, - num_phys_buf, mr_access_flags, iova_start); - return ib_mr; -} /* end ehca_reg_phys_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int mr_access_flags, - struct ib_udata *udata) -{ - struct ib_mr *ib_mr; - struct ehca_mr *e_mr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo; - int ret, page_shift; - u32 num_kpages; - u32 num_hwpages; - u64 hwpage_size; - - if (!pd) { - ehca_gen_err("bad pd=%p", pd); - return ERR_PTR(-EFAULT); - } - - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - - if (length == 0 || virt + length < virt) { - ehca_err(pd->device, "bad input values: length=%llx " - "virt_base=%llx", length, virt); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(pd->device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto reg_user_mr_exit0; - } - - e_mr->umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); - if (IS_ERR(e_mr->umem)) { - ib_mr = (void *)e_mr->umem; - goto reg_user_mr_exit1; - } - - if (e_mr->umem->page_size != PAGE_SIZE) { - ehca_err(pd->device, "page size not supported, " - "e_mr->umem->page_size=%x", e_mr->umem->page_size); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit2; - } - - /* determine number of MR pages */ - num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); - /* select proper hw_pgsize */ - page_shift = PAGE_SHIFT; - if (e_mr->umem->hugetlb) { - /* determine page_shift, clamp between 4K and 16M */ - page_shift = (fls64(length - 1) + 3) & ~3; - page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), - EHCA_MR_PGSHIFT16M); - } - hwpage_size = 1UL << page_shift; - - /* now that we have the desired page size, shift until it's - * supported, too. 4K is always supported, so this terminates. - */ - while (!(hwpage_size & shca->hca_cap_mr_pgsize)) - hwpage_size >>= 4; - -reg_user_mr_fallback: - num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); - /* register MR on HCA */ - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_USER; - pginfo.hwpage_size = hwpage_size; - pginfo.num_kpages = num_kpages; - pginfo.num_hwpages = num_hwpages; - pginfo.u.usr.region = e_mr->umem; - pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size; - pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; - ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, - e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); - if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { - ehca_warn(pd->device, "failed to register mr " - "with hwpage_size=%llx", hwpage_size); - ehca_info(pd->device, "try to register mr with " - "kpage_size=%lx", PAGE_SIZE); - /* - * this means kpages are not contiguous for a hw page - * try kernel page size as fallback solution - */ - hwpage_size = PAGE_SIZE; - goto reg_user_mr_fallback; - } - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_user_mr_exit2; - } - - /* successful registration of all pages */ - return &e_mr->ib.ib_mr; - -reg_user_mr_exit2: - ib_umem_release(e_mr->umem); -reg_user_mr_exit1: - ehca_mr_delete(e_mr); -reg_user_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", - PTR_ERR(ib_mr), pd, mr_access_flags, udata); - return ib_mr; -} /* end ehca_reg_user_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - int ret; - - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - u64 new_size; - u64 *new_start; - u32 new_acl; - struct ehca_pd *new_pd; - u32 tmp_lkey, tmp_rkey; - unsigned long sl_flags; - u32 num_kpages = 0; - u32 num_hwpages = 0; - struct ehca_mr_pginfo pginfo; - - if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { - /* TODO not supported, because PHYP rereg hCall needs pages */ - ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " - "supported yet, mr_rereg_mask=%x", mr_rereg_mask); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - if (mr_rereg_mask & IB_MR_REREG_PD) { - if (!pd) { - ehca_err(mr->device, "rereg with bad pd, pd=%p " - "mr_rereg_mask=%x", pd, mr_rereg_mask); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - } - - if ((mr_rereg_mask & - ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || - (mr_rereg_mask == 0)) { - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - /* check other parameters */ - if (e_mr == shca->maxmr) { - /* should be impossible, however reject to be sure */ - ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " - "shca->maxmr=%p mr->lkey=%x", - mr, shca->maxmr, mr->lkey); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ - if (e_mr->flags & EHCA_MR_FLAG_FMR) { - ehca_err(mr->device, "not supported for FMR, mr=%p " - "flags=%x", mr, e_mr->flags); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - if (!phys_buf_array || num_phys_buf <= 0) { - ehca_err(mr->device, "bad input values mr_rereg_mask=%x" - " phys_buf_array=%p num_phys_buf=%x", - mr_rereg_mask, phys_buf_array, num_phys_buf); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - } - if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ - (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " - "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - /* set requested values dependent on rereg request */ - spin_lock_irqsave(&e_mr->mrlock, sl_flags); - new_start = e_mr->start; - new_size = e_mr->size; - new_acl = e_mr->acl; - new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); - - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - u64 hw_pgsize = ehca_get_max_hwpage_size(shca); - - new_start = iova_start; /* change address */ - /* check physical buffer list and calculate size */ - ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, - num_phys_buf, iova_start, - &new_size); - if (ret) - goto rereg_phys_mr_exit1; - if ((new_size == 0) || - (((u64)iova_start + new_size) < (u64)iova_start)) { - ehca_err(mr->device, "bad input values: new_size=%llx " - "iova_start=%p", new_size, iova_start); - ret = -EINVAL; - goto rereg_phys_mr_exit1; - } - num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + - new_size, PAGE_SIZE); - num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + - new_size, hw_pgsize); - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.num_hwpages = num_hwpages; - pginfo.u.phy.num_phys_buf = num_phys_buf; - pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = - ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; - } - if (mr_rereg_mask & IB_MR_REREG_ACCESS) - new_acl = mr_access_flags; - if (mr_rereg_mask & IB_MR_REREG_PD) - new_pd = container_of(pd, struct ehca_pd, ib_pd); - - ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, - new_pd, &pginfo, &tmp_lkey, &tmp_rkey); - if (ret) - goto rereg_phys_mr_exit1; - - /* successful reregistration */ - if (mr_rereg_mask & IB_MR_REREG_PD) - mr->pd = pd; - mr->lkey = tmp_lkey; - mr->rkey = tmp_rkey; - -rereg_phys_mr_exit1: - spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); -rereg_phys_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " - "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " - "iova_start=%p", - ret, mr, mr_rereg_mask, pd, phys_buf_array, - num_phys_buf, mr_access_flags, iova_start); - return ret; -} /* end ehca_rereg_phys_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - unsigned long sl_flags; - struct ehca_mr_hipzout_parms hipzout; - - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " - "e_mr->flags=%x", mr, e_mr, e_mr->flags); - ret = -EINVAL; - goto query_mr_exit0; - } - - memset(mr_attr, 0, sizeof(struct ib_mr_attr)); - spin_lock_irqsave(&e_mr->mrlock, sl_flags); - - h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " - "hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca2ib_return_code(h_ret); - goto query_mr_exit1; - } - mr_attr->pd = mr->pd; - mr_attr->device_virt_addr = hipzout.vaddr; - mr_attr->size = hipzout.len; - mr_attr->lkey = hipzout.lkey; - mr_attr->rkey = hipzout.rkey; - ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); - -query_mr_exit1: - spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); -query_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", - ret, mr, mr_attr); - return ret; -} /* end ehca_query_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dereg_mr(struct ib_mr *mr) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " - "e_mr->flags=%x", mr, e_mr, e_mr->flags); - ret = -EINVAL; - goto dereg_mr_exit0; - } else if (e_mr == shca->maxmr) { - /* should be impossible, however reject to be sure */ - ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " - "shca->maxmr=%p mr->lkey=%x", - mr, shca->maxmr, mr->lkey); - ret = -EINVAL; - goto dereg_mr_exit0; - } - - /* TODO: BUSY: MR still has bound window(s) */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " - "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", - h_ret, shca, e_mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca2ib_return_code(h_ret); - goto dereg_mr_exit0; - } - - if (e_mr->umem) - ib_umem_release(e_mr->umem); - - /* successful deregistration */ - ehca_mr_delete(e_mr); - -dereg_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p", ret, mr); - return ret; -} /* end ehca_dereg_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) -{ - struct ib_mw *ib_mw; - u64 h_ret; - struct ehca_mw *e_mw; - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_mw_hipzout_parms hipzout; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - e_mw = ehca_mw_new(); - if (!e_mw) { - ib_mw = ERR_PTR(-ENOMEM); - goto alloc_mw_exit0; - } - - h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, - e_pd->fw_pd, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " - "shca=%p hca_hndl=%llx mw=%p", - h_ret, shca, shca->ipz_hca_handle.handle, e_mw); - ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); - goto alloc_mw_exit1; - } - /* successful MW allocation */ - e_mw->ipz_mw_handle = hipzout.handle; - e_mw->ib_mw.rkey = hipzout.rkey; - return &e_mw->ib_mw; - -alloc_mw_exit1: - ehca_mw_delete(e_mw); -alloc_mw_exit0: - if (IS_ERR(ib_mw)) - ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); - return ib_mw; -} /* end ehca_alloc_mw() */ - -/*----------------------------------------------------------------------*/ - -int ehca_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - /* TODO: not supported up to now */ - ehca_gen_err("bind MW currently not supported by HCAD"); - - return -EPERM; -} /* end ehca_bind_mw() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dealloc_mw(struct ib_mw *mw) -{ - u64 h_ret; - struct ehca_shca *shca = - container_of(mw->device, struct ehca_shca, ib_device); - struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); - - h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); - if (h_ret != H_SUCCESS) { - ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " - "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", - h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, - e_mw->ipz_mw_handle.handle); - return ehca2ib_return_code(h_ret); - } - /* successful deallocation */ - ehca_mw_delete(e_mw); - return 0; -} /* end ehca_dealloc_mw() */ - -/*----------------------------------------------------------------------*/ - -struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct ib_fmr *ib_fmr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr *e_fmr; - int ret; - u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo; - u64 hw_pgsize; - - /* check other parameters */ - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - if (mr_access_flags & IB_ACCESS_MW_BIND) { - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { - ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " - "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", - fmr_attr->max_pages, fmr_attr->max_maps, - fmr_attr->page_shift); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - - hw_pgsize = 1 << fmr_attr->page_shift; - if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { - ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", - fmr_attr->page_shift); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - - e_fmr = ehca_mr_new(); - if (!e_fmr) { - ib_fmr = ERR_PTR(-ENOMEM); - goto alloc_fmr_exit0; - } - e_fmr->flags |= EHCA_MR_FLAG_FMR; - - /* register MR on HCA */ - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.hwpage_size = hw_pgsize; - /* - * pginfo.num_hwpages==0, ie register_rpages() will not be called - * but deferred to map_phys_fmr() - */ - ret = ehca_reg_mr(shca, e_fmr, NULL, - fmr_attr->max_pages * (1 << fmr_attr->page_shift), - mr_access_flags, e_pd, &pginfo, - &tmp_lkey, &tmp_rkey, EHCA_REG_MR); - if (ret) { - ib_fmr = ERR_PTR(ret); - goto alloc_fmr_exit1; - } - - /* successful */ - e_fmr->hwpage_size = hw_pgsize; - e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; - e_fmr->fmr_max_pages = fmr_attr->max_pages; - e_fmr->fmr_max_maps = fmr_attr->max_maps; - e_fmr->fmr_map_cnt = 0; - return &e_fmr->ib.ib_fmr; - -alloc_fmr_exit1: - ehca_mr_delete(e_fmr); -alloc_fmr_exit0: - return ib_fmr; -} /* end ehca_alloc_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, - int list_len, - u64 iova) -{ - int ret; - struct ehca_shca *shca = - container_of(fmr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); - struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo; - u32 tmp_lkey, tmp_rkey; - - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", - e_fmr, e_fmr->flags); - ret = -EINVAL; - goto map_phys_fmr_exit0; - } - ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); - if (ret) - goto map_phys_fmr_exit0; - if (iova % e_fmr->fmr_page_size) { - /* only whole-numbered pages */ - ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", - iova, e_fmr->fmr_page_size); - ret = -EINVAL; - goto map_phys_fmr_exit0; - } - if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { - /* HCAD does not limit the maps, however trace this anyway */ - ehca_info(fmr->device, "map limit exceeded, fmr=%p " - "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", - fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); - } - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_kpages = list_len; - pginfo.hwpage_size = e_fmr->hwpage_size; - pginfo.num_hwpages = - list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; - pginfo.u.fmr.page_list = page_list; - pginfo.next_hwpage = - (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; - pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; - - ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, - list_len * e_fmr->fmr_page_size, - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); - if (ret) - goto map_phys_fmr_exit0; - - /* successful reregistration */ - e_fmr->fmr_map_cnt++; - e_fmr->ib.ib_fmr.lkey = tmp_lkey; - e_fmr->ib.ib_fmr.rkey = tmp_rkey; - return 0; - -map_phys_fmr_exit0: - if (ret) - ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " - "iova=%llx", ret, fmr, page_list, list_len, iova); - return ret; -} /* end ehca_map_phys_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_unmap_fmr(struct list_head *fmr_list) -{ - int ret = 0; - struct ib_fmr *ib_fmr; - struct ehca_shca *shca = NULL; - struct ehca_shca *prev_shca; - struct ehca_mr *e_fmr; - u32 num_fmr = 0; - u32 unmap_fmr_cnt = 0; - - /* check all FMR belong to same SHCA, and check internal flag */ - list_for_each_entry(ib_fmr, fmr_list, list) { - prev_shca = shca; - shca = container_of(ib_fmr->device, struct ehca_shca, - ib_device); - e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); - if ((shca != prev_shca) && prev_shca) { - ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " - "prev_shca=%p e_fmr=%p", - shca, prev_shca, e_fmr); - ret = -EINVAL; - goto unmap_fmr_exit0; - } - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " - "e_fmr->flags=%x", e_fmr, e_fmr->flags); - ret = -EINVAL; - goto unmap_fmr_exit0; - } - num_fmr++; - } - - /* loop over all FMRs to unmap */ - list_for_each_entry(ib_fmr, fmr_list, list) { - unmap_fmr_cnt++; - e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); - shca = container_of(ib_fmr->device, struct ehca_shca, - ib_device); - ret = ehca_unmap_one_fmr(shca, e_fmr); - if (ret) { - /* unmap failed, stop unmapping of rest of FMRs */ - ehca_err(&shca->ib_device, "unmap of one FMR failed, " - "stop rest, e_fmr=%p num_fmr=%x " - "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, - unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); - goto unmap_fmr_exit0; - } - } - -unmap_fmr_exit0: - if (ret) - ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", - ret, fmr_list, num_fmr, unmap_fmr_cnt); - return ret; -} /* end ehca_unmap_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dealloc_fmr(struct ib_fmr *fmr) -{ - int ret; - u64 h_ret; - struct ehca_shca *shca = - container_of(fmr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); - - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", - e_fmr, e_fmr->flags); - ret = -EINVAL; - goto free_fmr_exit0; - } - - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " - "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, fmr->lkey); - ret = ehca2ib_return_code(h_ret); - goto free_fmr_exit0; - } - /* successful deregistration */ - ehca_mr_delete(e_fmr); - return 0; - -free_fmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); - return ret; -} /* end ehca_dealloc_fmr() */ - -/*----------------------------------------------------------------------*/ - -static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo); - -int ehca_reg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, /*OUT*/ - u32 *rkey, /*OUT*/ - enum ehca_reg_type reg_type) -{ - int ret; - u64 h_ret; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); - if (ehca_use_hp_mr == 1) - hipz_acl |= 0x00000001; - - h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, - (u64)iova_start, size, hipz_acl, - e_pd->fw_pd, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " - "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); - ret = ehca2ib_return_code(h_ret); - goto ehca_reg_mr_exit0; - } - - e_mr->ipz_mr_handle = hipzout.handle; - - if (reg_type == EHCA_REG_BUSMAP_MR) - ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); - else if (reg_type == EHCA_REG_MR) - ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); - else - ret = -EINVAL; - - if (ret) - goto ehca_reg_mr_exit1; - - /* successful registration */ - e_mr->num_kpages = pginfo->num_kpages; - e_mr->num_hwpages = pginfo->num_hwpages; - e_mr->hwpage_size = pginfo->hwpage_size; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; - -ehca_reg_mr_exit1: - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", - h_ret, shca, e_mr, iova_start, size, acl, e_pd, - hipzout.lkey, pginfo, pginfo->num_kpages, - pginfo->num_hwpages, ret); - ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " - "not recoverable"); - } -ehca_reg_mr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%llx num_hwpages=%llx", - ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, - pginfo->num_kpages, pginfo->num_hwpages); - return ret; -} /* end ehca_reg_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo) -{ - int ret = 0; - u64 h_ret; - u32 rnum; - u64 rpage; - u32 i; - u64 *kpage; - - if (!pginfo->num_hwpages) /* in case of fmr */ - return 0; - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - ret = -ENOMEM; - goto ehca_reg_mr_rpages_exit0; - } - - /* max MAX_RPAGES ehca mr pages per register call */ - for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { - - if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { - rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ - if (rnum == 0) - rnum = MAX_RPAGES; /* last shot is full */ - } else - rnum = MAX_RPAGES; - - ret = ehca_set_pagebuf(pginfo, rnum, kpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf " - "bad rc, ret=%i rnum=%x kpage=%p", - ret, rnum, kpage); - goto ehca_reg_mr_rpages_exit1; - } - - if (rnum > 1) { - rpage = __pa(kpage); - if (!rpage) { - ehca_err(&shca->ib_device, "kpage=%p i=%x", - kpage, i); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } - } else - rpage = *kpage; - - h_ret = hipz_h_register_rpage_mr( - shca->ipz_hca_handle, e_mr, - ehca_encode_hwpage_size(pginfo->hwpage_size), - 0, rpage, rnum); - - if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { - /* - * check for 'registration complete'==H_SUCCESS - * and for 'page registered'==H_PAGE_REGISTERED - */ - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "last " - "hipz_reg_rpage_mr failed, h_ret=%lli " - "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" - " lkey=%x", h_ret, e_mr, i, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - break; - } else - ret = 0; - } else if (h_ret != H_PAGE_REGISTERED) { - ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " - "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " - "mr_hndl=%llx", h_ret, e_mr, i, - e_mr->ib.ib_mr.lkey, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle); - ret = ehca2ib_return_code(h_ret); - break; - } else - ret = 0; - } /* end for(i) */ - - -ehca_reg_mr_rpages_exit1: - ehca_free_fw_ctrlblock(kpage); -ehca_reg_mr_rpages_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " - "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, - pginfo, pginfo->num_kpages, pginfo->num_hwpages); - return ret; -} /* end ehca_reg_mr_rpages() */ - -/*----------------------------------------------------------------------*/ - -inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - u32 acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ -{ - int ret; - u64 h_ret; - u32 hipz_acl; - u64 *kpage; - u64 rpage; - struct ehca_mr_pginfo pginfo_save; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - ret = -ENOMEM; - goto ehca_rereg_mr_rereg1_exit0; - } - - pginfo_save = *pginfo; - ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); - if (ret) { - ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " - "kpage=%p", e_mr, pginfo, pginfo->type, - pginfo->num_kpages, pginfo->num_hwpages, kpage); - goto ehca_rereg_mr_rereg1_exit1; - } - rpage = __pa(kpage); - if (!rpage) { - ehca_err(&shca->ib_device, "kpage=%p", kpage); - ret = -EFAULT; - goto ehca_rereg_mr_rereg1_exit1; - } - h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, - (u64)iova_start, size, hipz_acl, - e_pd->fw_pd, rpage, &hipzout); - if (h_ret != H_SUCCESS) { - /* - * reregistration unsuccessful, try it again with the 3 hCalls, - * e.g. this is required in case H_MR_CONDITION - * (MW bound or MR is shared) - */ - ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " - "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); - *pginfo = pginfo_save; - ret = -EAGAIN; - } else if ((u64 *)hipzout.vaddr != iova_start) { - ehca_err(&shca->ib_device, "PHYP changed iova_start in " - "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " - "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, - hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey, hipzout.lkey); - ret = -EFAULT; - } else { - /* - * successful reregistration - * note: start and start_out are identical for eServer HCAs - */ - e_mr->num_kpages = pginfo->num_kpages; - e_mr->num_hwpages = pginfo->num_hwpages; - e_mr->hwpage_size = pginfo->hwpage_size; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - } - -ehca_rereg_mr_rereg1_exit1: - ehca_free_fw_ctrlblock(kpage); -ehca_rereg_mr_rereg1_exit0: - if ( ret && (ret != -EAGAIN) ) - ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " - "pginfo=%p num_kpages=%llx num_hwpages=%llx", - ret, *lkey, *rkey, pginfo, pginfo->num_kpages, - pginfo->num_hwpages); - return ret; -} /* end ehca_rereg_mr_rereg1() */ - -/*----------------------------------------------------------------------*/ - -int ehca_rereg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey) -{ - int ret = 0; - u64 h_ret; - int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ - int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ - - /* first determine reregistration hCall(s) */ - if ((pginfo->num_hwpages > MAX_RPAGES) || - (e_mr->num_hwpages > MAX_RPAGES) || - (pginfo->num_hwpages > e_mr->num_hwpages)) { - ehca_dbg(&shca->ib_device, "Rereg3 case, " - "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", - pginfo->num_hwpages, e_mr->num_hwpages); - rereg_1_hcall = 0; - rereg_3_hcall = 1; - } - - if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ - rereg_1_hcall = 0; - rereg_3_hcall = 1; - e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; - ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", - e_mr); - } - - if (rereg_1_hcall) { - ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, - acl, e_pd, pginfo, lkey, rkey); - if (ret) { - if (ret == -EAGAIN) - rereg_3_hcall = 1; - else - goto ehca_rereg_mr_exit0; - } - } - - if (rereg_3_hcall) { - struct ehca_mr save_mr; - - /* first deregister old MR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " - "mr->lkey=%x", - h_ret, e_mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_rereg_mr_exit0; - } - /* clean ehca_mr_t, without changing struct ib_mr and lock */ - save_mr = *e_mr; - ehca_mr_deletenew(e_mr); - - /* set some MR values */ - e_mr->flags = save_mr.flags; - e_mr->hwpage_size = save_mr.hwpage_size; - e_mr->fmr_page_size = save_mr.fmr_page_size; - e_mr->fmr_max_pages = save_mr.fmr_max_pages; - e_mr->fmr_max_maps = save_mr.fmr_max_maps; - e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey, EHCA_REG_MR); - if (ret) { - u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; - memcpy(&e_mr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - goto ehca_rereg_mr_exit0; - } - } - -ehca_rereg_mr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " - "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, - acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, - rereg_1_hcall, rereg_3_hcall); - return ret; -} /* end ehca_rereg_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_unmap_one_fmr(struct ehca_shca *shca, - struct ehca_mr *e_fmr) -{ - int ret = 0; - u64 h_ret; - struct ehca_pd *e_pd = - container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); - struct ehca_mr save_fmr; - u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo; - struct ehca_mr_hipzout_parms hipzout; - struct ehca_mr save_mr; - - if (e_fmr->fmr_max_pages <= MAX_RPAGES) { - /* - * note: after using rereg hcall with len=0, - * rereg hcall must be used again for registering pages - */ - h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, - 0, 0, e_pd->fw_pd, 0, &hipzout); - if (h_ret == H_SUCCESS) { - /* successful reregistration */ - e_fmr->start = NULL; - e_fmr->size = 0; - tmp_lkey = hipzout.lkey; - tmp_rkey = hipzout.rkey; - return 0; - } - /* - * should not happen, because length checked above, - * FMRs are not shared and no MW bound to FMRs - */ - ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " - "mr_hndl=%llx lkey=%x lkey_out=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey, hipzout.lkey); - /* try free and rereg */ - } - - /* first free old FMR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " - "lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_unmap_one_fmr_exit0; - } - /* clean ehca_mr_t, without changing lock */ - save_fmr = *e_fmr; - ehca_mr_deletenew(e_fmr); - - /* set some MR values */ - e_fmr->flags = save_fmr.flags; - e_fmr->hwpage_size = save_fmr.hwpage_size; - e_fmr->fmr_page_size = save_fmr.fmr_page_size; - e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; - e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; - e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; - e_fmr->acl = save_fmr.acl; - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_FMR; - ret = ehca_reg_mr(shca, e_fmr, NULL, - (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey, EHCA_REG_MR); - if (ret) { - u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; - memcpy(&e_fmr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - } - -ehca_unmap_one_fmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " - "fmr_max_pages=%x", - ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); - return ret; -} /* end ehca_unmap_one_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_smr(struct ehca_shca *shca, - struct ehca_mr *e_origmr, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ -{ - int ret = 0; - u64 h_ret; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); - - h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, - (u64)iova_start, hipz_acl, e_pd->fw_pd, - &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " - "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " - "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, - shca->ipz_hca_handle.handle, - e_origmr->ipz_mr_handle.handle, - e_origmr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_reg_smr_exit0; - } - /* successful registration */ - e_newmr->num_kpages = e_origmr->num_kpages; - e_newmr->num_hwpages = e_origmr->num_hwpages; - e_newmr->hwpage_size = e_origmr->hwpage_size; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; - e_newmr->ipz_mr_handle = hipzout.handle; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; - -ehca_reg_smr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " - "e_newmr=%p iova_start=%p acl=%x e_pd=%p", - ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); - return ret; -} /* end ehca_reg_smr() */ - -/*----------------------------------------------------------------------*/ -static inline void *ehca_calc_sectbase(int top, int dir, int idx) -{ - unsigned long ret = idx; - ret |= dir << EHCA_DIR_INDEX_SHIFT; - ret |= top << EHCA_TOP_INDEX_SHIFT; - return __va(ret << SECTION_SIZE_BITS); -} - -#define ehca_bmap_valid(entry) \ - ((u64)entry != (u64)EHCA_INVAL_ADDR) - -static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, - struct ehca_shca *shca, struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 h_ret = 0; - unsigned long page = 0; - u64 rpage = __pa(kpage); - int page_count; - - void *sectbase = ehca_calc_sectbase(top, dir, idx); - if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { - ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" - "hwpage_size does not fit to " - "section start address"); - } - page_count = EHCA_SECTSIZE / pginfo->hwpage_size; - - while (page < page_count) { - u64 rnum; - for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); - rnum++) { - void *pg = sectbase + ((page++) * pginfo->hwpage_size); - kpage[rnum] = __pa(pg); - } - - h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, - ehca_encode_hwpage_size(pginfo->hwpage_size), - 0, rpage, rnum); - - if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { - ehca_err(&shca->ib_device, "register_rpage_mr failed"); - return h_ret; - } - } - return h_ret; -} - -static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, - struct ehca_shca *shca, struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 hret = H_SUCCESS; - int idx; - - for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) - continue; - - hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, - pginfo); - if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) - return hret; - } - return hret; -} - -static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, - struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 hret = H_SUCCESS; - int dir; - - for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - continue; - - hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); - if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) - return hret; - } - return hret; -} - -/* register internal max-MR to internal SHCA */ -int ehca_reg_internal_maxmr( - struct ehca_shca *shca, - struct ehca_pd *e_pd, - struct ehca_mr **e_maxmr) /*OUT*/ -{ - int ret; - struct ehca_mr *e_mr; - u64 *iova_start; - u64 size_maxmr; - struct ehca_mr_pginfo pginfo; - struct ib_phys_buf ib_pbuf; - u32 num_kpages; - u32 num_hwpages; - u64 hw_pgsize; - - if (!ehca_bmap) { - ret = -EFAULT; - goto ehca_reg_internal_maxmr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(&shca->ib_device, "out of memory"); - ret = -ENOMEM; - goto ehca_reg_internal_maxmr_exit0; - } - e_mr->flags |= EHCA_MR_FLAG_MAXMR; - - /* register internal max-MR on HCA */ - size_maxmr = ehca_mr_len; - iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); - ib_pbuf.addr = 0; - ib_pbuf.size = size_maxmr; - num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, - PAGE_SIZE); - hw_pgsize = ehca_get_max_hwpage_size(shca); - num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, - hw_pgsize); - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.num_hwpages = num_hwpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.u.phy.num_phys_buf = 1; - pginfo.u.phy.phys_buf_array = &ib_pbuf; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, - &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); - if (ret) { - ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " - "num_hwpages=%x", e_mr, iova_start, size_maxmr, - num_kpages, num_hwpages); - goto ehca_reg_internal_maxmr_exit1; - } - - /* successful registration of all pages */ - e_mr->ib.ib_mr.device = e_pd->ib_pd.device; - e_mr->ib.ib_mr.pd = &e_pd->ib_pd; - e_mr->ib.ib_mr.uobject = NULL; - atomic_inc(&(e_pd->ib_pd.usecnt)); - atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); - *e_maxmr = e_mr; - return 0; - -ehca_reg_internal_maxmr_exit1: - ehca_mr_delete(e_mr); -ehca_reg_internal_maxmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", - ret, shca, e_pd, e_maxmr); - return ret; -} /* end ehca_reg_internal_maxmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_maxmr(struct ehca_shca *shca, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey) -{ - u64 h_ret; - struct ehca_mr *e_origmr = shca->maxmr; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); - - h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, - (u64)iova_start, hipz_acl, e_pd->fw_pd, - &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " - "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, e_origmr, shca->ipz_hca_handle.handle, - e_origmr->ipz_mr_handle.handle, - e_origmr->ib.ib_mr.lkey); - return ehca2ib_return_code(h_ret); - } - /* successful registration */ - e_newmr->num_kpages = e_origmr->num_kpages; - e_newmr->num_hwpages = e_origmr->num_hwpages; - e_newmr->hwpage_size = e_origmr->hwpage_size; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; - e_newmr->ipz_mr_handle = hipzout.handle; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; -} /* end ehca_reg_maxmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dereg_internal_maxmr(struct ehca_shca *shca) -{ - int ret; - struct ehca_mr *e_maxmr; - struct ib_pd *ib_pd; - - if (!shca->maxmr) { - ehca_err(&shca->ib_device, "bad call, shca=%p", shca); - ret = -EINVAL; - goto ehca_dereg_internal_maxmr_exit0; - } - - e_maxmr = shca->maxmr; - ib_pd = e_maxmr->ib.ib_mr.pd; - shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ - - ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); - if (ret) { - ehca_err(&shca->ib_device, "dereg internal max-MR failed, " - "ret=%i e_maxmr=%p shca=%p lkey=%x", - ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); - shca->maxmr = e_maxmr; - goto ehca_dereg_internal_maxmr_exit0; - } - - atomic_dec(&ib_pd->usecnt); - -ehca_dereg_internal_maxmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", - ret, shca, shca->maxmr); - return ret; -} /* end ehca_dereg_internal_maxmr() */ - -/*----------------------------------------------------------------------*/ - -/* - * check physical buffer array of MR verbs for validness and - * calculates MR size - */ -int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - u64 *iova_start, - u64 *size) -{ - struct ib_phys_buf *pbuf = phys_buf_array; - u64 size_count = 0; - u32 i; - - if (num_phys_buf == 0) { - ehca_gen_err("bad phys buf array len, num_phys_buf=0"); - return -EINVAL; - } - /* check first buffer */ - if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { - ehca_gen_err("iova_start/addr mismatch, iova_start=%p " - "pbuf->addr=%llx pbuf->size=%llx", - iova_start, pbuf->addr, pbuf->size); - return -EINVAL; - } - if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && - (num_phys_buf > 1)) { - ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " - "pbuf->size=%llx", pbuf->addr, pbuf->size); - return -EINVAL; - } - - for (i = 0; i < num_phys_buf; i++) { - if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { - ehca_gen_err("bad address, i=%x pbuf->addr=%llx " - "pbuf->size=%llx", - i, pbuf->addr, pbuf->size); - return -EINVAL; - } - if (((i > 0) && /* not 1st */ - (i < (num_phys_buf - 1)) && /* not last */ - (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { - ehca_gen_err("bad size, i=%x pbuf->size=%llx", - i, pbuf->size); - return -EINVAL; - } - size_count += pbuf->size; - pbuf++; - } - - *size = size_count; - return 0; -} /* end ehca_mr_chk_buf_and_calc_size() */ - -/*----------------------------------------------------------------------*/ - -/* check page list of map FMR verb for validness */ -int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, - u64 *page_list, - int list_len) -{ - u32 i; - u64 *page; - - if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { - ehca_gen_err("bad list_len, list_len=%x " - "e_fmr->fmr_max_pages=%x fmr=%p", - list_len, e_fmr->fmr_max_pages, e_fmr); - return -EINVAL; - } - - /* each page must be aligned */ - page = page_list; - for (i = 0; i < list_len; i++) { - if (*page % e_fmr->fmr_page_size) { - ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " - "fmr_page_size=%x", i, *page, page, e_fmr, - e_fmr->fmr_page_size); - return -EINVAL; - } - page++; - } - - return 0; -} /* end ehca_fmr_check_page_list() */ - -/*----------------------------------------------------------------------*/ - -/* PAGE_SIZE >= pginfo->hwpage_size */ -static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret = 0; - u64 pgaddr; - u32 j = 0; - int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - struct scatterlist **sg = &pginfo->u.usr.next_sg; - - while (*sg != NULL) { - pgaddr = page_to_pfn(sg_page(*sg)) - << PAGE_SHIFT; - *kpage = pgaddr + (pginfo->next_hwpage * - pginfo->hwpage_size); - if (!(*kpage)) { - ehca_gen_err("pgaddr=%llx " - "sg_dma_address=%llx " - "entry=%llx next_hwpage=%llx", - pgaddr, (u64)sg_dma_address(*sg), - pginfo->u.usr.next_nmap, - pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - *sg = sg_next(*sg); - } - j++; - if (j >= number) - break; - } - - return ret; -} - -/* - * check given pages for contiguous layout - * last page addr is returned in prev_pgaddr for further check - */ -static int ehca_check_kpages_per_ate(struct scatterlist **sg, - int num_pages, - u64 *prev_pgaddr) -{ - for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { - u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; - if (ehca_debug_level >= 3) - ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, - *(u64 *)__va(pgaddr)); - if (pgaddr - PAGE_SIZE != *prev_pgaddr) { - ehca_gen_err("uncontiguous page found pgaddr=%llx " - "prev_pgaddr=%llx entries_left_in_hwpage=%x", - pgaddr, *prev_pgaddr, num_pages); - return -EINVAL; - } - *prev_pgaddr = pgaddr; - } - return 0; -} - -/* PAGE_SIZE < pginfo->hwpage_size */ -static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret = 0; - u64 pgaddr, prev_pgaddr; - u32 j = 0; - int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; - int nr_kpages = kpages_per_hwpage; - struct scatterlist **sg = &pginfo->u.usr.next_sg; - - while (*sg != NULL) { - - if (nr_kpages == kpages_per_hwpage) { - pgaddr = (page_to_pfn(sg_page(*sg)) - << PAGE_SHIFT); - *kpage = pgaddr; - if (!(*kpage)) { - ehca_gen_err("pgaddr=%llx entry=%llx", - pgaddr, pginfo->u.usr.next_nmap); - ret = -EFAULT; - return ret; - } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%llx entry=%llx " - "mr_pgsize=%llx", - pgaddr, pginfo->u.usr.next_nmap, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = pgaddr & - ~(pginfo->hwpage_size - 1); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)__va(pgaddr); - ehca_gen_dbg("kpage=%llx page=%llx " - "value=%016llx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - *sg = sg_next(*sg); - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; - } - - ret = ehca_check_kpages_per_ate(sg, nr_kpages, - &prev_pgaddr); - if (ret) - return ret; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; - -next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) - break; - } - - return ret; -} - -static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, - u32 number, u64 *kpage) -{ - int ret = 0; - struct ib_phys_buf *pbuf; - u64 num_hw, offs_hw; - u32 i = 0; - - /* loop over desired phys_buf_array entries */ - while (i < number) { - pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; - num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + - pbuf->size, pginfo->hwpage_size); - offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / - pginfo->hwpage_size; - while (pginfo->next_hwpage < offs_hw + num_hw) { - /* sanity check */ - if ((pginfo->kpage_cnt >= pginfo->num_kpages) || - (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { - ehca_gen_err("kpage_cnt >= num_kpages, " - "kpage_cnt=%llx num_kpages=%llx " - "hwpage_cnt=%llx " - "num_hwpages=%llx i=%x", - pginfo->kpage_cnt, - pginfo->num_kpages, - pginfo->hwpage_cnt, - pginfo->num_hwpages, i); - return -EFAULT; - } - *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + - (pginfo->next_hwpage * pginfo->hwpage_size); - if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " - "next_hwpage=%llx", pbuf->addr, - pbuf->size, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - if (PAGE_SIZE >= pginfo->hwpage_size) { - if (pginfo->next_hwpage % - (PAGE_SIZE / pginfo->hwpage_size) == 0) - (pginfo->kpage_cnt)++; - } else - pginfo->kpage_cnt += pginfo->hwpage_size / - PAGE_SIZE; - kpage++; - i++; - if (i >= number) break; - } - if (pginfo->next_hwpage >= offs_hw + num_hw) { - (pginfo->u.phy.next_buf)++; - pginfo->next_hwpage = 0; - } - } - return ret; -} - -static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, - u32 number, u64 *kpage) -{ - int ret = 0; - u64 *fmrlist; - u32 i; - - /* loop over desired page_list entries */ - fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; - for (i = 0; i < number; i++) { - *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + - pginfo->next_hwpage * pginfo->hwpage_size; - if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%llx fmrlist=%p " - "next_listelem=%llx next_hwpage=%llx", - *fmrlist, fmrlist, - pginfo->u.fmr.next_listelem, - pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { - if (pginfo->next_hwpage % - (pginfo->u.fmr.fmr_pgsize / - pginfo->hwpage_size) == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.fmr.next_listelem)++; - fmrlist++; - pginfo->next_hwpage = 0; - } else - (pginfo->next_hwpage)++; - } else { - unsigned int cnt_per_hwpage = pginfo->hwpage_size / - pginfo->u.fmr.fmr_pgsize; - unsigned int j; - u64 prev = *kpage; - /* check if adrs are contiguous */ - for (j = 1; j < cnt_per_hwpage; j++) { - u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); - if (prev + pginfo->u.fmr.fmr_pgsize != p) { - ehca_gen_err("uncontiguous fmr pages " - "found prev=%llx p=%llx " - "idx=%x", prev, p, i + j); - return -EINVAL; - } - prev = p; - } - pginfo->kpage_cnt += cnt_per_hwpage; - pginfo->u.fmr.next_listelem += cnt_per_hwpage; - fmrlist += cnt_per_hwpage; - } - kpage++; - } - return ret; -} - -/* setup page buffer from page info */ -int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret; - - switch (pginfo->type) { - case EHCA_MR_PGI_PHYS: - ret = ehca_set_pagebuf_phys(pginfo, number, kpage); - break; - case EHCA_MR_PGI_USER: - ret = PAGE_SIZE >= pginfo->hwpage_size ? - ehca_set_pagebuf_user1(pginfo, number, kpage) : - ehca_set_pagebuf_user2(pginfo, number, kpage); - break; - case EHCA_MR_PGI_FMR: - ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); - break; - default: - ehca_gen_err("bad pginfo->type=%x", pginfo->type); - ret = -EFAULT; - break; - } - return ret; -} /* end ehca_set_pagebuf() */ - -/*----------------------------------------------------------------------*/ - -/* - * check MR if it is a max-MR, i.e. uses whole memory - * in case it's a max-MR 1 is returned, else 0 - */ -int ehca_mr_is_maxmr(u64 size, - u64 *iova_start) -{ - /* a MR is treated as max-MR only if it fits following: */ - if ((size == ehca_mr_len) && - (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { - ehca_gen_dbg("this is a max-MR"); - return 1; - } else - return 0; -} /* end ehca_mr_is_maxmr() */ - -/*----------------------------------------------------------------------*/ - -/* map access control for MR/MW. This routine is used for MR and MW. */ -void ehca_mrmw_map_acl(int ib_acl, - u32 *hipz_acl) -{ - *hipz_acl = 0; - if (ib_acl & IB_ACCESS_REMOTE_READ) - *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; - if (ib_acl & IB_ACCESS_REMOTE_WRITE) - *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; - if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) - *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; - if (ib_acl & IB_ACCESS_LOCAL_WRITE) - *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; - if (ib_acl & IB_ACCESS_MW_BIND) - *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; -} /* end ehca_mrmw_map_acl() */ - -/*----------------------------------------------------------------------*/ - -/* sets page size in hipz access control for MR/MW. */ -void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ -{ - *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); -} /* end ehca_mrmw_set_pgsize_hipz_acl() */ - -/*----------------------------------------------------------------------*/ - -/* - * reverse map access control for MR/MW. - * This routine is used for MR and MW. - */ -void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, - int *ib_acl) /*OUT*/ -{ - *ib_acl = 0; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) - *ib_acl |= IB_ACCESS_REMOTE_READ; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) - *ib_acl |= IB_ACCESS_REMOTE_WRITE; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) - *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; - if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) - *ib_acl |= IB_ACCESS_LOCAL_WRITE; - if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) - *ib_acl |= IB_ACCESS_MW_BIND; -} /* end ehca_mrmw_reverse_map_acl() */ - - -/*----------------------------------------------------------------------*/ - -/* - * MR destructor and constructor - * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, - * except struct ib_mr and spinlock - */ -void ehca_mr_deletenew(struct ehca_mr *mr) -{ - mr->flags = 0; - mr->num_kpages = 0; - mr->num_hwpages = 0; - mr->acl = 0; - mr->start = NULL; - mr->fmr_page_size = 0; - mr->fmr_max_pages = 0; - mr->fmr_max_maps = 0; - mr->fmr_map_cnt = 0; - memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); - memset(&mr->galpas, 0, sizeof(mr->galpas)); -} /* end ehca_mr_deletenew() */ - -int ehca_init_mrmw_cache(void) -{ - mr_cache = kmem_cache_create("ehca_cache_mr", - sizeof(struct ehca_mr), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!mr_cache) - return -ENOMEM; - mw_cache = kmem_cache_create("ehca_cache_mw", - sizeof(struct ehca_mw), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!mw_cache) { - kmem_cache_destroy(mr_cache); - mr_cache = NULL; - return -ENOMEM; - } - return 0; -} - -void ehca_cleanup_mrmw_cache(void) -{ - if (mr_cache) - kmem_cache_destroy(mr_cache); - if (mw_cache) - kmem_cache_destroy(mw_cache); -} - -static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, - int dir) -{ - if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { - ehca_top_bmap->dir[dir] = - kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); - if (!ehca_top_bmap->dir[dir]) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); - } - return 0; -} - -static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) -{ - if (!ehca_bmap_valid(ehca_bmap->top[top])) { - ehca_bmap->top[top] = - kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); - if (!ehca_bmap->top[top]) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); - } - return ehca_init_top_bmap(ehca_bmap->top[top], dir); -} - -static inline int ehca_calc_index(unsigned long i, unsigned long s) -{ - return (i >> s) & EHCA_INDEX_MASK; -} - -void ehca_destroy_busmap(void) -{ - int top, dir; - - if (!ehca_bmap) - return; - - for (top = 0; top < EHCA_MAP_ENTRIES; top++) { - if (!ehca_bmap_valid(ehca_bmap->top[top])) - continue; - for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - continue; - - kfree(ehca_bmap->top[top]->dir[dir]); - } - - kfree(ehca_bmap->top[top]); - } - - kfree(ehca_bmap); - ehca_bmap = NULL; -} - -static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) -{ - unsigned long i, start_section, end_section; - int top, dir, idx; - - if (!nr_pages) - return 0; - - if (!ehca_bmap) { - ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); - if (!ehca_bmap) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); - } - - start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; - end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; - for (i = start_section; i < end_section; i++) { - int ret; - top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); - dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); - idx = i & EHCA_INDEX_MASK; - - ret = ehca_init_bmap(ehca_bmap, top, dir); - if (ret) { - ehca_destroy_busmap(); - return ret; - } - ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; - ehca_mr_len += EHCA_SECTSIZE; - } - return 0; -} - -static int ehca_is_hugepage(unsigned long pfn) -{ - int page_order; - - if (pfn & EHCA_HUGEPAGE_PFN_MASK) - return 0; - - page_order = compound_order(pfn_to_page(pfn)); - if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) - return 0; - - return 1; -} - -static int ehca_create_busmap_callback(unsigned long initial_pfn, - unsigned long total_nr_pages, void *arg) -{ - int ret; - unsigned long pfn, start_pfn, end_pfn, nr_pages; - - if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) - return ehca_update_busmap(initial_pfn, total_nr_pages); - - /* Given chunk is >= 16GB -> check for hugepages */ - start_pfn = initial_pfn; - end_pfn = initial_pfn + total_nr_pages; - pfn = start_pfn; - - while (pfn < end_pfn) { - if (ehca_is_hugepage(pfn)) { - /* Add mem found in front of the hugepage */ - nr_pages = pfn - start_pfn; - ret = ehca_update_busmap(start_pfn, nr_pages); - if (ret) - return ret; - /* Skip the hugepage */ - pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); - start_pfn = pfn; - } else - pfn += (EHCA_SECTSIZE / PAGE_SIZE); - } - - /* Add mem found behind the hugepage(s) */ - nr_pages = pfn - start_pfn; - return ehca_update_busmap(start_pfn, nr_pages); -} - -int ehca_create_busmap(void) -{ - int ret; - - ehca_mr_len = 0; - ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, - ehca_create_busmap_callback); - return ret; -} - -static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo) -{ - int top; - u64 hret, *kpage; - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - return -ENOMEM; - } - for (top = 0; top < EHCA_MAP_ENTRIES; top++) { - if (!ehca_bmap_valid(ehca_bmap->top[top])) - continue; - hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); - if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) - break; - } - - ehca_free_fw_ctrlblock(kpage); - - if (hret == H_SUCCESS) - return 0; /* Everything is fine */ - else { - ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " - "h_ret=%lli e_mr=%p top=%x lkey=%x " - "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, - e_mr->ib.ib_mr.lkey, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle); - return ehca2ib_return_code(hret); - } -} - -static u64 ehca_map_vaddr(void *caddr) -{ - int top, dir, idx; - unsigned long abs_addr, offset; - u64 entry; - - if (!ehca_bmap) - return EHCA_INVAL_ADDR; - - abs_addr = __pa(caddr); - top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); - if (!ehca_bmap_valid(ehca_bmap->top[top])) - return EHCA_INVAL_ADDR; - - dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - return EHCA_INVAL_ADDR; - - idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); - - entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; - if (ehca_bmap_valid(entry)) { - offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); - return entry | offset; - } else - return EHCA_INVAL_ADDR; -} - -static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) -{ - return dma_addr == EHCA_INVAL_ADDR; -} - -static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, - size_t size, enum dma_data_direction direction) -{ - if (cpu_addr) - return ehca_map_vaddr(cpu_addr); - else - return EHCA_INVAL_ADDR; -} - -static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, - enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - u64 addr; - - if (offset + size > PAGE_SIZE) - return EHCA_INVAL_ADDR; - - addr = ehca_map_vaddr(page_address(page)); - if (!ehca_dma_mapping_error(dev, addr)) - addr += offset; - - return addr; -} - -static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, - enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) { - u64 addr; - addr = ehca_map_vaddr(sg_virt(sg)); - if (ehca_dma_mapping_error(dev, addr)) - return 0; - - sg->dma_address = addr; - sg->dma_length = sg->length; - } - return nents; -} - -static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); -} - -static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_device(dev->dma_device, addr, size, dir); -} - -static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, - u64 *dma_handle, gfp_t flag) -{ - struct page *p; - void *addr = NULL; - u64 dma_addr; - - p = alloc_pages(flag, get_order(size)); - if (p) { - addr = page_address(p); - dma_addr = ehca_map_vaddr(addr); - if (ehca_dma_mapping_error(dev, dma_addr)) { - free_pages((unsigned long)addr, get_order(size)); - return NULL; - } - if (dma_handle) - *dma_handle = dma_addr; - return addr; - } - return NULL; -} - -static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, - void *cpu_addr, u64 dma_handle) -{ - if (cpu_addr && size) - free_pages((unsigned long)cpu_addr, get_order(size)); -} - - -struct ib_dma_mapping_ops ehca_dma_mapping_ops = { - .mapping_error = ehca_dma_mapping_error, - .map_single = ehca_dma_map_single, - .unmap_single = ehca_dma_unmap_single, - .map_page = ehca_dma_map_page, - .unmap_page = ehca_dma_unmap_page, - .map_sg = ehca_dma_map_sg, - .unmap_sg = ehca_dma_unmap_sg, - .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, - .sync_single_for_device = ehca_dma_sync_single_for_device, - .alloc_coherent = ehca_dma_alloc_coherent, - .free_coherent = ehca_dma_free_coherent, -}; diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_mrmw.h b/kernel/drivers/infiniband/hw/ehca/ehca_mrmw.h deleted file mode 100644 index 50d8b5130..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * MR/MW declarations and inline functions - * - * Authors: Dietmar Decker <ddecker@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _EHCA_MRMW_H_ -#define _EHCA_MRMW_H_ - -enum ehca_reg_type { - EHCA_REG_MR, - EHCA_REG_BUSMAP_MR -}; - -int ehca_reg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey, - enum ehca_reg_type reg_type); - -int ehca_reg_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo); - -int ehca_rereg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int mr_access_flags, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey); - -int ehca_unmap_one_fmr(struct ehca_shca *shca, - struct ehca_mr *e_fmr); - -int ehca_reg_smr(struct ehca_shca *shca, - struct ehca_mr *e_origmr, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey); - -int ehca_reg_internal_maxmr(struct ehca_shca *shca, - struct ehca_pd *e_pd, - struct ehca_mr **maxmr); - -int ehca_reg_maxmr(struct ehca_shca *shca, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey); - -int ehca_dereg_internal_maxmr(struct ehca_shca *shca); - -int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - u64 *iova_start, - u64 *size); - -int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, - u64 *page_list, - int list_len); - -int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage); - -int ehca_mr_is_maxmr(u64 size, - u64 *iova_start); - -void ehca_mrmw_map_acl(int ib_acl, - u32 *hipz_acl); - -void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); - -void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, - int *ib_acl); - -void ehca_mr_deletenew(struct ehca_mr *mr); - -int ehca_create_busmap(void); - -void ehca_destroy_busmap(void); - -extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; -#endif /*_EHCA_MRMW_H_*/ diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_pd.c b/kernel/drivers/infiniband/hw/ehca/ehca_pd.c deleted file mode 100644 index 351577a66..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_pd.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * PD functions - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/slab.h> - -#include "ehca_tools.h" -#include "ehca_iverbs.h" - -static struct kmem_cache *pd_cache; - -struct ib_pd *ehca_alloc_pd(struct ib_device *device, - struct ib_ucontext *context, struct ib_udata *udata) -{ - struct ehca_pd *pd; - int i; - - pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); - if (!pd) { - ehca_err(device, "device=%p context=%p out of memory", - device, context); - return ERR_PTR(-ENOMEM); - } - - for (i = 0; i < 2; i++) { - INIT_LIST_HEAD(&pd->free[i]); - INIT_LIST_HEAD(&pd->full[i]); - } - mutex_init(&pd->lock); - - /* - * Kernel PD: when device = -1, 0 - * User PD: when context != -1 - */ - if (!context) { - /* - * Kernel PDs after init reuses always - * the one created in ehca_shca_reopen() - */ - struct ehca_shca *shca = container_of(device, struct ehca_shca, - ib_device); - pd->fw_pd.value = shca->pd->fw_pd.value; - } else - pd->fw_pd.value = (u64)pd; - - return &pd->ib_pd; -} - -int ehca_dealloc_pd(struct ib_pd *pd) -{ - struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); - int i, leftovers = 0; - struct ipz_small_queue_page *page, *tmp; - - for (i = 0; i < 2; i++) { - list_splice(&my_pd->full[i], &my_pd->free[i]); - list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { - leftovers = 1; - free_page(page->page); - kmem_cache_free(small_qp_cache, page); - } - } - - if (leftovers) - ehca_warn(pd->device, - "Some small queue pages were not freed"); - - kmem_cache_free(pd_cache, my_pd); - - return 0; -} - -int ehca_init_pd_cache(void) -{ - pd_cache = kmem_cache_create("ehca_cache_pd", - sizeof(struct ehca_pd), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!pd_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_pd_cache(void) -{ - if (pd_cache) - kmem_cache_destroy(pd_cache); -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_qes.h b/kernel/drivers/infiniband/hw/ehca/ehca_qes.h deleted file mode 100644 index 90c4efa67..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_qes.h +++ /dev/null @@ -1,260 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Hardware request structures - * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _EHCA_QES_H_ -#define _EHCA_QES_H_ - -#include "ehca_tools.h" - -/* virtual scatter gather entry to specify remote addresses with length */ -struct ehca_vsgentry { - u64 vaddr; - u32 lkey; - u32 length; -}; - -#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) - -/* - * Unreliable Datagram Address Vector Format - * see IBTA Vol1 chapter 8.3 Global Routing Header - */ -struct ehca_ud_av { - u8 sl; - u8 lnh; - u16 dlid; - u8 reserved1; - u8 reserved2; - u8 reserved3; - u8 slid_path_bits; - u8 reserved4; - u8 ipd; - u8 reserved5; - u8 pmtu; - u32 reserved6; - u64 reserved7; - union { - struct { - u64 word_0; /* always set to 6 */ - /*should be 0x1B for IB transport */ - u64 word_1; - u64 word_2; - u64 word_3; - u64 word_4; - } grh; - struct { - u32 wd_0; - u32 wd_1; - /* DWord_1 --> SGID */ - - u32 sgid_wd3; - u32 sgid_wd2; - - u32 sgid_wd1; - u32 sgid_wd0; - /* DWord_3 --> DGID */ - - u32 dgid_wd3; - u32 dgid_wd2; - - u32 dgid_wd1; - u32 dgid_wd0; - } grh_l; - }; -}; - -/* maximum number of sg entries allowed in a WQE */ -#define MAX_WQE_SG_ENTRIES 252 - -#define WQE_OPTYPE_SEND 0x80 -#define WQE_OPTYPE_RDMAREAD 0x40 -#define WQE_OPTYPE_RDMAWRITE 0x20 -#define WQE_OPTYPE_CMPSWAP 0x10 -#define WQE_OPTYPE_FETCHADD 0x08 -#define WQE_OPTYPE_BIND 0x04 - -#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 -#define WQE_WRFLAG_FENCE 0x40 -#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 -#define WQE_WRFLAG_SOLIC_EVENT 0x10 - -#define WQEF_CACHE_HINT 0x80 -#define WQEF_CACHE_HINT_RD_WR 0x40 -#define WQEF_TIMED_WQE 0x20 -#define WQEF_PURGE 0x08 -#define WQEF_HIGH_NIBBLE 0xF0 - -#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 -#define MW_BIND_ACCESSCTRL_R_READ 0x20 -#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 - -struct ehca_wqe { - u64 work_request_id; - u8 optype; - u8 wr_flag; - u16 pkeyi; - u8 wqef; - u8 nr_of_data_seg; - u16 wqe_provided_slid; - u32 destination_qp_number; - u32 resync_psn_sqp; - u32 local_ee_context_qkey; - u32 immediate_data; - union { - struct { - u64 remote_virtual_address; - u32 rkey; - u32 reserved; - u64 atomic_1st_op_dma_len; - u64 atomic_2nd_op; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - - } nud; - struct { - u64 ehca_ud_av_ptr; - u64 reserved1; - u64 reserved2; - u64 reserved3; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - } ud_avp; - struct { - struct ehca_ud_av ud_av; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - - 2]; - } ud_av; - struct { - u64 reserved0; - u64 reserved1; - u64 reserved2; - u64 reserved3; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - } all_rcv; - - struct { - u64 reserved; - u32 rkey; - u32 old_rkey; - u64 reserved1; - u64 reserved2; - u64 virtual_address; - u32 reserved3; - u32 length; - u32 reserved4; - u16 reserved5; - u8 reserved6; - u8 lr_ctl; - u32 lkey; - u32 reserved7; - u64 reserved8; - u64 reserved9; - u64 reserved10; - u64 reserved11; - } bind; - struct { - u64 reserved12; - u64 reserved13; - u32 size; - u32 start; - } inline_data; - } u; - -}; - -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) -#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) -#define WC_STATUS_ERROR_BIT 0x80000000 -#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 -#define WC_STATUS_PURGE_BIT 0x10 -#define WC_SEND_RECEIVE_BIT 0x80 - -struct ehca_cqe { - u64 work_request_id; - u8 optype; - u8 w_completion_flags; - u16 reserved1; - u32 nr_bytes_transferred; - u32 immediate_data; - u32 local_qp_number; - u8 freed_resource_count; - u8 service_level; - u16 wqe_count; - u32 qp_token; - u32 qkey_ee_token; - u32 remote_qp_number; - u16 dlid; - u16 rlid; - u16 reserved2; - u16 pkey_index; - u32 cqe_timestamp; - u32 wqe_timestamp; - u8 wqe_timestamp_valid; - u8 reserved3; - u8 reserved4; - u8 cqe_flags; - u32 status; -}; - -struct ehca_eqe { - u64 entry; -}; - -struct ehca_mrte { - u64 starting_va; - u64 length; /* length of memory region in bytes*/ - u32 pd; - u8 key_instance; - u8 pagesize; - u8 mr_control; - u8 local_remote_access_ctrl; - u8 reserved[0x20 - 0x18]; - u64 at_pointer[4]; -}; -#endif /*_EHCA_QES_H_*/ diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_qp.c b/kernel/drivers/infiniband/hw/ehca/ehca_qp.c deleted file mode 100644 index 2e89356c4..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_qp.c +++ /dev/null @@ -1,2257 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * QP functions - * - * Authors: Joachim Fenkes <fenkes@de.ibm.com> - * Stefan Roscher <stefan.roscher@de.ibm.com> - * Waleri Fomin <fomin@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/slab.h> - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" -#include "hipz_fns.h" - -static struct kmem_cache *qp_cache; - -/* - * attributes not supported by query qp - */ -#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ - IB_QP_EN_SQD_ASYNC_NOTIFY) - -/* - * ehca (internal) qp state values - */ -enum ehca_qp_state { - EHCA_QPS_RESET = 1, - EHCA_QPS_INIT = 2, - EHCA_QPS_RTR = 3, - EHCA_QPS_RTS = 5, - EHCA_QPS_SQD = 6, - EHCA_QPS_SQE = 8, - EHCA_QPS_ERR = 128 -}; - -/* - * qp state transitions as defined by IB Arch Rel 1.1 page 431 - */ -enum ib_qp_statetrans { - IB_QPST_ANY2RESET, - IB_QPST_ANY2ERR, - IB_QPST_RESET2INIT, - IB_QPST_INIT2RTR, - IB_QPST_INIT2INIT, - IB_QPST_RTR2RTS, - IB_QPST_RTS2SQD, - IB_QPST_RTS2RTS, - IB_QPST_SQD2RTS, - IB_QPST_SQE2RTS, - IB_QPST_SQD2SQD, - IB_QPST_MAX /* nr of transitions, this must be last!!! */ -}; - -/* - * ib2ehca_qp_state maps IB to ehca qp_state - * returns ehca qp state corresponding to given ib qp state - */ -static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) -{ - switch (ib_qp_state) { - case IB_QPS_RESET: - return EHCA_QPS_RESET; - case IB_QPS_INIT: - return EHCA_QPS_INIT; - case IB_QPS_RTR: - return EHCA_QPS_RTR; - case IB_QPS_RTS: - return EHCA_QPS_RTS; - case IB_QPS_SQD: - return EHCA_QPS_SQD; - case IB_QPS_SQE: - return EHCA_QPS_SQE; - case IB_QPS_ERR: - return EHCA_QPS_ERR; - default: - ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); - return -EINVAL; - } -} - -/* - * ehca2ib_qp_state maps ehca to IB qp_state - * returns ib qp state corresponding to given ehca qp state - */ -static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state - ehca_qp_state) -{ - switch (ehca_qp_state) { - case EHCA_QPS_RESET: - return IB_QPS_RESET; - case EHCA_QPS_INIT: - return IB_QPS_INIT; - case EHCA_QPS_RTR: - return IB_QPS_RTR; - case EHCA_QPS_RTS: - return IB_QPS_RTS; - case EHCA_QPS_SQD: - return IB_QPS_SQD; - case EHCA_QPS_SQE: - return IB_QPS_SQE; - case EHCA_QPS_ERR: - return IB_QPS_ERR; - default: - ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); - return -EINVAL; - } -} - -/* - * ehca_qp_type used as index for req_attr and opt_attr of - * struct ehca_modqp_statetrans - */ -enum ehca_qp_type { - QPT_RC = 0, - QPT_UC = 1, - QPT_UD = 2, - QPT_SQP = 3, - QPT_MAX -}; - -/* - * ib2ehcaqptype maps Ib to ehca qp_type - * returns ehca qp type corresponding to ib qp type - */ -static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) -{ - switch (ibqptype) { - case IB_QPT_SMI: - case IB_QPT_GSI: - return QPT_SQP; - case IB_QPT_RC: - return QPT_RC; - case IB_QPT_UC: - return QPT_UC; - case IB_QPT_UD: - return QPT_UD; - default: - ehca_gen_err("Invalid ibqptype=%x", ibqptype); - return -EINVAL; - } -} - -static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, - int ib_tostate) -{ - int index = -EINVAL; - switch (ib_tostate) { - case IB_QPS_RESET: - index = IB_QPST_ANY2RESET; - break; - case IB_QPS_INIT: - switch (ib_fromstate) { - case IB_QPS_RESET: - index = IB_QPST_RESET2INIT; - break; - case IB_QPS_INIT: - index = IB_QPST_INIT2INIT; - break; - } - break; - case IB_QPS_RTR: - if (ib_fromstate == IB_QPS_INIT) - index = IB_QPST_INIT2RTR; - break; - case IB_QPS_RTS: - switch (ib_fromstate) { - case IB_QPS_RTR: - index = IB_QPST_RTR2RTS; - break; - case IB_QPS_RTS: - index = IB_QPST_RTS2RTS; - break; - case IB_QPS_SQD: - index = IB_QPST_SQD2RTS; - break; - case IB_QPS_SQE: - index = IB_QPST_SQE2RTS; - break; - } - break; - case IB_QPS_SQD: - if (ib_fromstate == IB_QPS_RTS) - index = IB_QPST_RTS2SQD; - break; - case IB_QPS_SQE: - break; - case IB_QPS_ERR: - index = IB_QPST_ANY2ERR; - break; - default: - break; - } - return index; -} - -/* - * ibqptype2servicetype returns hcp service type corresponding to given - * ib qp type used by create_qp() - */ -static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) -{ - switch (ibqptype) { - case IB_QPT_SMI: - case IB_QPT_GSI: - return ST_UD; - case IB_QPT_RC: - return ST_RC; - case IB_QPT_UC: - return ST_UC; - case IB_QPT_UD: - return ST_UD; - case IB_QPT_RAW_IPV6: - return -EINVAL; - case IB_QPT_RAW_ETHERTYPE: - return -EINVAL; - default: - ehca_gen_err("Invalid ibqptype=%x", ibqptype); - return -EINVAL; - } -} - -/* - * init userspace queue info from ipz_queue data - */ -static inline void queue2resp(struct ipzu_queue_resp *resp, - struct ipz_queue *queue) -{ - resp->qe_size = queue->qe_size; - resp->act_nr_of_sg = queue->act_nr_of_sg; - resp->queue_length = queue->queue_length; - resp->pagesize = queue->pagesize; - resp->toggle_state = queue->toggle_state; - resp->offset = queue->offset; -} - -/* - * init_qp_queue initializes/constructs r/squeue and registers queue pages. - */ -static inline int init_qp_queue(struct ehca_shca *shca, - struct ehca_pd *pd, - struct ehca_qp *my_qp, - struct ipz_queue *queue, - int q_type, - u64 expected_hret, - struct ehca_alloc_queue_parms *parms, - int wqe_size) -{ - int ret, cnt, ipz_rc, nr_q_pages; - void *vpage; - u64 rpage, h_ret; - struct ib_device *ib_dev = &shca->ib_device; - struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; - - if (!parms->queue_size) - return 0; - - if (parms->is_small) { - nr_q_pages = 1; - ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, - 128 << parms->page_size, - wqe_size, parms->act_nr_sges, 1); - } else { - nr_q_pages = parms->queue_size; - ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, - EHCA_PAGESIZE, wqe_size, - parms->act_nr_sges, 0); - } - - if (!ipz_rc) { - ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", - ipz_rc); - return -EBUSY; - } - - /* register queue pages */ - for (cnt = 0; cnt < nr_q_pages; cnt++) { - vpage = ipz_qpageit_get_inc(queue); - if (!vpage) { - ehca_err(ib_dev, "ipz_qpageit_get_inc() " - "failed p_vpage= %p", vpage); - ret = -EINVAL; - goto init_qp_queue1; - } - rpage = __pa(vpage); - - h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, - my_qp->ipz_qp_handle, - NULL, 0, q_type, - rpage, parms->is_small ? 0 : 1, - my_qp->galpas.kernel); - if (cnt == (nr_q_pages - 1)) { /* last page! */ - if (h_ret != expected_hret) { - ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%lli", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queue1; - } - vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); - if (vpage) { - ehca_err(ib_dev, "ipz_qpageit_get_inc() " - "should not succeed vpage=%p", vpage); - ret = -EINVAL; - goto init_qp_queue1; - } - } else { - if (h_ret != H_PAGE_REGISTERED) { - ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%lli", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queue1; - } - } - } - - ipz_qeit_reset(queue); - - return 0; - -init_qp_queue1: - ipz_queue_dtor(pd, queue); - return ret; -} - -static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) -{ - if (is_llqp) - return 128 << act_nr_sge; - else - return offsetof(struct ehca_wqe, - u.nud.sg_list[act_nr_sge]); -} - -static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, - int req_nr_sge, int is_llqp) -{ - u32 wqe_size, q_size; - int act_nr_sge = req_nr_sge; - - if (!is_llqp) - /* round up #SGEs so WQE size is a power of 2 */ - for (act_nr_sge = 4; act_nr_sge <= 252; - act_nr_sge = 4 + 2 * act_nr_sge) - if (act_nr_sge >= req_nr_sge) - break; - - wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); - q_size = wqe_size * (queue->max_wr + 1); - - if (q_size <= 512) - queue->page_size = 2; - else if (q_size <= 1024) - queue->page_size = 3; - else - queue->page_size = 0; - - queue->is_small = (queue->page_size != 0); -} - -/* needs to be called with cq->spinlock held */ -void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) -{ - struct list_head *list, *node; - - /* TODO: support low latency QPs */ - if (qp->ext_type == EQPT_LLQP) - return; - - if (on_sq) { - list = &qp->send_cq->sqp_err_list; - node = &qp->sq_err_node; - } else { - list = &qp->recv_cq->rqp_err_list; - node = &qp->rq_err_node; - } - - if (list_empty(node)) - list_add_tail(node, list); - - return; -} - -static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) -{ - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - - if (!list_empty(node)) - list_del_init(node); - - spin_unlock_irqrestore(&cq->spinlock, flags); -} - -static void reset_queue_map(struct ehca_queue_map *qmap) -{ - int i; - - qmap->tail = qmap->entries - 1; - qmap->left_to_poll = 0; - qmap->next_wqe_idx = 0; - for (i = 0; i < qmap->entries; i++) { - qmap->map[i].reported = 1; - qmap->map[i].cqe_req = 0; - } -} - -/* - * Create an ib_qp struct that is either a QP or an SRQ, depending on - * the value of the is_srq parameter. If init_attr and srq_init_attr share - * fields, the field out of init_attr is used. - */ -static struct ehca_qp *internal_create_qp( - struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata, int is_srq) -{ - struct ehca_qp *my_qp, *my_srq = NULL; - struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - struct ib_ucontext *context = NULL; - u64 h_ret; - int is_llqp = 0, has_srq = 0, is_user = 0; - int qp_type, max_send_sge, max_recv_sge, ret; - - /* h_call's out parameters */ - struct ehca_alloc_qp_parms parms; - u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; - unsigned long flags; - - if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { - ehca_err(pd->device, "Unable to create QP, max number of %i " - "QPs reached.", shca->max_num_qps); - ehca_err(pd->device, "To increase the maximum number of QPs " - "use the number_of_qps module parameter.\n"); - return ERR_PTR(-ENOSPC); - } - - if (init_attr->create_flags) { - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - memset(&parms, 0, sizeof(parms)); - qp_type = init_attr->qp_type; - - if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && - init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { - ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", - init_attr->sq_sig_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - /* save LLQP info */ - if (qp_type & 0x80) { - is_llqp = 1; - parms.ext_type = EQPT_LLQP; - parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; - } - qp_type &= 0x1F; - init_attr->qp_type &= 0x1F; - - /* handle SRQ base QPs */ - if (init_attr->srq) { - my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); - - if (qp_type == IB_QPT_UC) { - ehca_err(pd->device, "UC with SRQ not supported"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - has_srq = 1; - parms.ext_type = EQPT_SRQBASE; - parms.srq_qpn = my_srq->real_qp_num; - } - - if (is_llqp && has_srq) { - ehca_err(pd->device, "LLQPs can't have an SRQ"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - /* handle SRQs */ - if (is_srq) { - parms.ext_type = EQPT_SRQ; - parms.srq_limit = srq_init_attr->attr.srq_limit; - if (init_attr->cap.max_recv_sge > 3) { - ehca_err(pd->device, "no more than three SGEs " - "supported for SRQ pd=%p max_sge=%x", - pd, init_attr->cap.max_recv_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } - - /* check QP type */ - if (qp_type != IB_QPT_UD && - qp_type != IB_QPT_UC && - qp_type != IB_QPT_RC && - qp_type != IB_QPT_SMI && - qp_type != IB_QPT_GSI) { - ehca_err(pd->device, "wrong QP Type=%x", qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - if (is_llqp) { - switch (qp_type) { - case IB_QPT_RC: - if ((init_attr->cap.max_send_wr > 255) || - (init_attr->cap.max_recv_wr > 255)) { - ehca_err(pd->device, - "Invalid Number of max_sq_wr=%x " - "or max_rq_wr=%x for RC LLQP", - init_attr->cap.max_send_wr, - init_attr->cap.max_recv_wr); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - break; - case IB_QPT_UD: - if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { - ehca_err(pd->device, "UD LLQP not supported " - "by this adapter"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-ENOSYS); - } - if (!(init_attr->cap.max_send_sge <= 5 - && init_attr->cap.max_send_sge >= 1 - && init_attr->cap.max_recv_sge <= 5 - && init_attr->cap.max_recv_sge >= 1)) { - ehca_err(pd->device, - "Invalid Number of max_send_sge=%x " - "or max_recv_sge=%x for UD LLQP", - init_attr->cap.max_send_sge, - init_attr->cap.max_recv_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } else if (init_attr->cap.max_send_wr > 255) { - ehca_err(pd->device, - "Invalid Number of " - "max_send_wr=%x for UD QP_TYPE=%x", - init_attr->cap.max_send_wr, qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - break; - default: - ehca_err(pd->device, "unsupported LL QP Type=%x", - qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } else { - int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI - || qp_type == IB_QPT_GSI) ? 250 : 252; - - if (init_attr->cap.max_send_sge > max_sge - || init_attr->cap.max_recv_sge > max_sge) { - ehca_err(pd->device, "Invalid number of SGEs requested " - "send_sge=%x recv_sge=%x max_sge=%x", - init_attr->cap.max_send_sge, - init_attr->cap.max_recv_sge, max_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } - - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); - if (!my_qp) { - ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); - atomic_dec(&shca->num_qps); - return ERR_PTR(-ENOMEM); - } - - if (pd->uobject && udata) { - is_user = 1; - context = pd->uobject->context; - } - - atomic_set(&my_qp->nr_events, 0); - init_waitqueue_head(&my_qp->wait_completion); - spin_lock_init(&my_qp->spinlock_s); - spin_lock_init(&my_qp->spinlock_r); - my_qp->qp_type = qp_type; - my_qp->ext_type = parms.ext_type; - my_qp->state = IB_QPS_RESET; - - if (init_attr->recv_cq) - my_qp->recv_cq = - container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); - if (init_attr->send_cq) - my_qp->send_cq = - container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - - idr_preload(GFP_KERNEL); - write_lock_irqsave(&ehca_qp_idr_lock, flags); - - ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); - if (ret >= 0) - my_qp->token = ret; - - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - idr_preload_end(); - if (ret < 0) { - if (ret == -ENOSPC) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid number of qp"); - } else { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); - } - goto create_qp_exit0; - } - - if (has_srq) - parms.srq_token = my_qp->token; - - parms.servicetype = ibqptype2servicetype(qp_type); - if (parms.servicetype < 0) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid qp_type=%x", qp_type); - goto create_qp_exit1; - } - - /* Always signal by WQE so we can hide circ. WQEs */ - parms.sigtype = HCALL_SIGT_BY_WQE; - - /* UD_AV CIRCUMVENTION */ - max_send_sge = init_attr->cap.max_send_sge; - max_recv_sge = init_attr->cap.max_recv_sge; - if (parms.servicetype == ST_UD && !is_llqp) { - max_send_sge += 2; - max_recv_sge += 2; - } - - parms.token = my_qp->token; - parms.eq_handle = shca->eq.ipz_eq_handle; - parms.pd = my_pd->fw_pd; - if (my_qp->send_cq) - parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; - if (my_qp->recv_cq) - parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; - - parms.squeue.max_wr = init_attr->cap.max_send_wr; - parms.rqueue.max_wr = init_attr->cap.max_recv_wr; - parms.squeue.max_sge = max_send_sge; - parms.rqueue.max_sge = max_recv_sge; - - /* RC QPs need one more SWQE for unsolicited ack circumvention */ - if (qp_type == IB_QPT_RC) - parms.squeue.max_wr++; - - if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { - if (HAS_SQ(my_qp)) - ehca_determine_small_queue( - &parms.squeue, max_send_sge, is_llqp); - if (HAS_RQ(my_qp)) - ehca_determine_small_queue( - &parms.rqueue, max_recv_sge, is_llqp); - parms.qp_storage = - (parms.squeue.is_small || parms.rqueue.is_small); - } - - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); - if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", - h_ret); - ret = ehca2ib_return_code(h_ret); - goto create_qp_exit1; - } - - ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; - my_qp->ipz_qp_handle = parms.qp_handle; - my_qp->galpas = parms.galpas; - - swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); - rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); - - switch (qp_type) { - case IB_QPT_RC: - if (is_llqp) { - parms.squeue.act_nr_sges = 1; - parms.rqueue.act_nr_sges = 1; - } - /* hide the extra WQE */ - parms.squeue.act_nr_wqes--; - break; - case IB_QPT_UD: - case IB_QPT_GSI: - case IB_QPT_SMI: - /* UD circumvention */ - if (is_llqp) { - parms.squeue.act_nr_sges = 1; - parms.rqueue.act_nr_sges = 1; - } else { - parms.squeue.act_nr_sges -= 2; - parms.rqueue.act_nr_sges -= 2; - } - - if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { - parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; - parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; - parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; - parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; - ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; - } - - break; - - default: - break; - } - - /* initialize r/squeue and register queue pages */ - if (HAS_SQ(my_qp)) { - ret = init_qp_queue( - shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, - HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, - &parms.squeue, swqe_size); - if (ret) { - ehca_err(pd->device, "Couldn't initialize squeue " - "and pages ret=%i", ret); - goto create_qp_exit2; - } - - if (!is_user) { - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / - my_qp->ipz_squeue.qe_size; - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit3; - } - INIT_LIST_HEAD(&my_qp->sq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->sq_map); - } - } - - if (HAS_RQ(my_qp)) { - ret = init_qp_queue( - shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, - H_SUCCESS, &parms.rqueue, rwqe_size); - if (ret) { - ehca_err(pd->device, "Couldn't initialize rqueue " - "and pages ret=%i", ret); - goto create_qp_exit4; - } - if (!is_user) { - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / - my_qp->ipz_rqueue.qe_size; - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->rq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit5; - } - INIT_LIST_HEAD(&my_qp->rq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->rq_map); - } - } else if (init_attr->srq && !is_user) { - /* this is a base QP, use the queue map of the SRQ */ - my_qp->rq_map = my_srq->rq_map; - INIT_LIST_HEAD(&my_qp->rq_err_node); - - my_qp->ipz_rqueue = my_srq->ipz_rqueue; - } - - if (is_srq) { - my_qp->ib_srq.pd = &my_pd->ib_pd; - my_qp->ib_srq.device = my_pd->ib_pd.device; - - my_qp->ib_srq.srq_context = init_attr->qp_context; - my_qp->ib_srq.event_handler = init_attr->event_handler; - } else { - my_qp->ib_qp.qp_num = ib_qp_num; - my_qp->ib_qp.pd = &my_pd->ib_pd; - my_qp->ib_qp.device = my_pd->ib_pd.device; - - my_qp->ib_qp.recv_cq = init_attr->recv_cq; - my_qp->ib_qp.send_cq = init_attr->send_cq; - - my_qp->ib_qp.qp_type = qp_type; - my_qp->ib_qp.srq = init_attr->srq; - - my_qp->ib_qp.qp_context = init_attr->qp_context; - my_qp->ib_qp.event_handler = init_attr->event_handler; - } - - init_attr->cap.max_inline_data = 0; /* not supported yet */ - init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; - init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; - init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; - init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; - my_qp->init_attr = *init_attr; - - if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { - shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = - &my_qp->ib_qp; - if (ehca_nr_ports < 0) { - /* alloc array to cache subsequent modify qp parms - * for autodetect mode - */ - my_qp->mod_qp_parm = - kzalloc(EHCA_MOD_QP_PARM_MAX * - sizeof(*my_qp->mod_qp_parm), - GFP_KERNEL); - if (!my_qp->mod_qp_parm) { - ehca_err(pd->device, - "Could not alloc mod_qp_parm"); - goto create_qp_exit5; - } - } - } - - /* NOTE: define_apq0() not supported yet */ - if (qp_type == IB_QPT_GSI) { - h_ret = ehca_define_sqp(shca, my_qp, init_attr); - if (h_ret != H_SUCCESS) { - kfree(my_qp->mod_qp_parm); - my_qp->mod_qp_parm = NULL; - /* the QP pointer is no longer valid */ - shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = - NULL; - ret = ehca2ib_return_code(h_ret); - goto create_qp_exit6; - } - } - - if (my_qp->send_cq) { - ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); - if (ret) { - ehca_err(pd->device, - "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit7; - } - } - - /* copy queues, galpa data to user space */ - if (context && udata) { - struct ehca_create_qp_resp resp; - memset(&resp, 0, sizeof(resp)); - - resp.qp_num = my_qp->real_qp_num; - resp.token = my_qp->token; - resp.qp_type = my_qp->qp_type; - resp.ext_type = my_qp->ext_type; - resp.qkey = my_qp->qkey; - resp.real_qp_num = my_qp->real_qp_num; - - if (HAS_SQ(my_qp)) - queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); - if (HAS_RQ(my_qp)) - queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); - resp.fw_handle_ofs = (u32) - (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); - - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { - ehca_err(pd->device, "Copy to udata failed"); - ret = -EINVAL; - goto create_qp_exit8; - } - } - - return my_qp; - -create_qp_exit8: - ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); - -create_qp_exit7: - kfree(my_qp->mod_qp_parm); - -create_qp_exit6: - if (HAS_RQ(my_qp) && !is_user) - vfree(my_qp->rq_map.map); - -create_qp_exit5: - if (HAS_RQ(my_qp)) - ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - -create_qp_exit4: - if (HAS_SQ(my_qp) && !is_user) - vfree(my_qp->sq_map.map); - -create_qp_exit3: - if (HAS_SQ(my_qp)) - ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - -create_qp_exit2: - hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); - -create_qp_exit1: - write_lock_irqsave(&ehca_qp_idr_lock, flags); - idr_remove(&ehca_qp_idr, my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - -create_qp_exit0: - kmem_cache_free(qp_cache, my_qp); - atomic_dec(&shca->num_qps); - return ERR_PTR(ret); -} - -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata) -{ - struct ehca_qp *ret; - - ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); - return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; -} - -static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject); - -struct ib_srq *ehca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) -{ - struct ib_qp_init_attr qp_init_attr; - struct ehca_qp *my_qp; - struct ib_srq *ret; - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - struct hcp_modify_qp_control_block *mqpcb; - u64 hret, update_mask; - - if (srq_init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); - - /* For common attributes, internal_create_qp() takes its info - * out of qp_init_attr, so copy all common attrs there. - */ - memset(&qp_init_attr, 0, sizeof(qp_init_attr)); - qp_init_attr.event_handler = srq_init_attr->event_handler; - qp_init_attr.qp_context = srq_init_attr->srq_context; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.qp_type = IB_QPT_RC; - qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; - qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; - - my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); - if (IS_ERR(my_qp)) - return (struct ib_srq *)my_qp; - - /* copy back return values */ - srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; - srq_init_attr->attr.max_sge = 3; - - /* drive SRQ into RTR state */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!mqpcb) { - ehca_err(pd->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); - ret = ERR_PTR(-ENOMEM); - goto create_srq1; - } - - mqpcb->qp_state = EHCA_QPS_INIT; - mqpcb->prim_phys_port = 1; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to INIT " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - mqpcb->qp_enable = 1; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not enable SRQ " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - mqpcb->qp_state = EHCA_QPS_RTR; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to RTR " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - ehca_free_fw_ctrlblock(mqpcb); - - return &my_qp->ib_srq; - -create_srq2: - ret = ERR_PTR(ehca2ib_return_code(hret)); - ehca_free_fw_ctrlblock(mqpcb); - -create_srq1: - internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); - - return ret; -} - -/* - * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts - * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe - * returns total number of bad wqes in bad_wqe_cnt - */ -static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, - int *bad_wqe_cnt) -{ - u64 h_ret; - struct ipz_queue *squeue; - void *bad_send_wqe_p, *bad_send_wqe_v; - u64 q_ofs; - struct ehca_wqe *wqe; - int qp_num = my_qp->ib_qp.qp_num; - - /* get send wqe pointer */ - h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, &my_qp->pf, - &bad_send_wqe_p, NULL, 2); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" - " ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp_num, h_ret); - return ehca2ib_return_code(h_ret); - } - bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); - ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", - qp_num, bad_send_wqe_p); - /* convert wqe pointer to vadr */ - bad_send_wqe_v = __va((u64)bad_send_wqe_p); - if (ehca_debug_level >= 2) - ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); - squeue = &my_qp->ipz_squeue; - if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { - ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" - " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); - return -EFAULT; - } - - /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); - *bad_wqe_cnt = 0; - while (wqe->optype != 0xff && wqe->wqef != 0xff) { - if (ehca_debug_level >= 2) - ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); - wqe->nr_of_data_seg = 0; /* suppress data access */ - wqe->wqef = WQEF_PURGE; /* WQE to be purged */ - q_ofs = ipz_queue_advance_offset(squeue, q_ofs); - wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); - *bad_wqe_cnt = (*bad_wqe_cnt)+1; - } - /* - * bad wqe will be reprocessed and ignored when pol_cq() is called, - * i.e. nr of wqes with flush error status is one less - */ - ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", - qp_num, (*bad_wqe_cnt)-1); - wqe->wqef = 0; - - return 0; -} - -static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, - struct ehca_queue_map *qmap) -{ - void *wqe_v; - u64 q_ofs; - u32 wqe_idx; - unsigned int tail_idx; - - /* convert real to abs address */ - wqe_p = wqe_p & (~(1UL << 63)); - - wqe_v = __va(wqe_p); - - if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { - ehca_gen_err("Invalid offset for calculating left cqes " - "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); - return -EFAULT; - } - - tail_idx = next_index(qmap->tail, qmap->entries); - wqe_idx = q_ofs / ipz_queue->qe_size; - - /* check all processed wqes, whether a cqe is requested or not */ - while (tail_idx != wqe_idx) { - if (qmap->map[tail_idx].cqe_req) - qmap->left_to_poll++; - tail_idx = next_index(tail_idx, qmap->entries); - } - /* save index in queue, where we have to start flushing */ - qmap->next_wqe_idx = wqe_idx; - return 0; -} - -static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) -{ - u64 h_ret; - void *send_wqe_p, *recv_wqe_p; - int ret; - unsigned long flags; - int qp_num = my_qp->ib_qp.qp_num; - - /* this hcall is not supported on base QPs */ - if (my_qp->ext_type != EQPT_SRQBASE) { - /* get send and receive wqe pointer */ - h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, &my_qp->pf, - &send_wqe_p, &recv_wqe_p, 4); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "disable_and_get_wqe() " - "failed ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp_num, h_ret); - return ehca2ib_return_code(h_ret); - } - - /* - * acquire lock to ensure that nobody is polling the cq which - * could mean that the qmap->tail pointer is in an - * inconsistent state. - */ - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, - &my_qp->sq_map); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - if (ret) - return ret; - - - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, - &my_qp->rq_map); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); - if (ret) - return ret; - } else { - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - my_qp->sq_map.left_to_poll = 0; - my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, - my_qp->sq_map.entries); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - my_qp->rq_map.left_to_poll = 0; - my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, - my_qp->rq_map.entries); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); - } - - /* this assures flush cqes being generated only for pending wqes */ - if ((my_qp->sq_map.left_to_poll == 0) && - (my_qp->rq_map.left_to_poll == 0)) { - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - ehca_add_to_err_list(my_qp, 1); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - - if (HAS_RQ(my_qp)) { - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - ehca_add_to_err_list(my_qp, 0); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, - flags); - } - } - - return 0; -} - -/* - * internal_modify_qp with circumvention to handle aqp0 properly - * smi_reset2init indicates if this is an internal reset-to-init-call for - * smi. This flag must always be zero if called from ehca_modify_qp()! - * This internal func was intorduced to avoid recursion of ehca_modify_qp()! - */ -static int internal_modify_qp(struct ib_qp *ibqp, - struct ib_qp_attr *attr, - int attr_mask, int smi_reset2init) -{ - enum ib_qp_state qp_cur_state, qp_new_state; - int cnt, qp_attr_idx, ret = 0; - enum ib_qp_statetrans statetrans; - struct hcp_modify_qp_control_block *mqpcb; - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = - container_of(ibqp->pd->device, struct ehca_shca, ib_device); - u64 update_mask; - u64 h_ret; - int bad_wqe_cnt = 0; - int is_user = 0; - int squeue_locked = 0; - unsigned long flags = 0; - - /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!mqpcb) { - ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - mqpcb, my_qp->galpas.kernel); - if (h_ret != H_SUCCESS) { - ehca_err(ibqp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, ibqp->qp_num, h_ret); - ret = ehca2ib_return_code(h_ret); - goto modify_qp_exit1; - } - if (ibqp->uobject) - is_user = 1; - - qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); - - if (qp_cur_state == -EINVAL) { /* invalid qp state */ - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " - "ehca_qp=%p qp_num=%x", - mqpcb->qp_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - /* - * circumvention to set aqp0 initial state to init - * as expected by IB spec - */ - if (smi_reset2init == 0 && - ibqp->qp_type == IB_QPT_SMI && - qp_cur_state == IB_QPS_RESET && - (attr_mask & IB_QP_STATE) && - attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ - struct ib_qp_attr smiqp_attr = { - .qp_state = IB_QPS_INIT, - .port_num = my_qp->init_attr.port_num, - .pkey_index = 0, - .qkey = 0 - }; - int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | - IB_QP_PKEY_INDEX | IB_QP_QKEY; - int smirc = internal_modify_qp( - ibqp, &smiqp_attr, smiqp_attr_mask, 1); - if (smirc) { - ehca_err(ibqp->device, "SMI RESET -> INIT failed. " - "ehca_modify_qp() rc=%i", smirc); - ret = H_PARAMETER; - goto modify_qp_exit1; - } - qp_cur_state = IB_QPS_INIT; - ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); - } - /* is transmitted current state equal to "real" current state */ - if ((attr_mask & IB_QP_CUR_STATE) && - qp_cur_state != attr->cur_qp_state) { - ret = -EINVAL; - ehca_err(ibqp->device, - "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" - " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", - attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " - "new qp_state=%x attribute_mask=%x", - my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); - - qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; - if (!smi_reset2init && - !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { - ret = -EINVAL; - ehca_err(ibqp->device, - "Invalid qp transition new_state=%x cur_state=%x " - "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, - qp_cur_state, my_qp, ibqp->qp_num, attr_mask); - goto modify_qp_exit1; - } - - mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); - if (mqpcb->qp_state) - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - else { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid new qp state=%x " - "ehca_qp=%p qp_num=%x", - qp_new_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - /* retrieve state transition struct to get req and opt attrs */ - statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); - if (statetrans < 0) { - ret = -EINVAL; - ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x " - "new_qp_state=%x State_xsition=%x ehca_qp=%p " - "qp_num=%x", qp_cur_state, qp_new_state, - statetrans, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); - - if (qp_attr_idx < 0) { - ret = qp_attr_idx; - ehca_err(ibqp->device, - "Invalid QP type=%x ehca_qp=%p qp_num=%x", - ibqp->qp_type, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - ehca_dbg(ibqp->device, - "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x", - my_qp, ibqp->qp_num, statetrans); - - /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set - * in non-LL UD QPs. - */ - if ((my_qp->qp_type == IB_QPT_UD) && - (my_qp->ext_type != EQPT_LLQP) && - (statetrans == IB_QPST_INIT2RTR) && - (shca->hw_level >= 0x22)) { - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); - mqpcb->send_grh_flag = 1; - } - - /* sqe -> rts: set purge bit of bad wqe before actual trans */ - if ((my_qp->qp_type == IB_QPT_UD || - my_qp->qp_type == IB_QPT_GSI || - my_qp->qp_type == IB_QPT_SMI) && - statetrans == IB_QPST_SQE2RTS) { - /* mark next free wqe if kernel */ - if (!ibqp->uobject) { - struct ehca_wqe *wqe; - /* lock send queue */ - spin_lock_irqsave(&my_qp->spinlock_s, flags); - squeue_locked = 1; - /* mark next free wqe */ - wqe = (struct ehca_wqe *) - ipz_qeit_get(&my_qp->ipz_squeue); - wqe->optype = wqe->wqef = 0xff; - ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", - ibqp->qp_num, wqe); - } - ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); - if (ret) { - ehca_err(ibqp->device, "prepare_sqe_rts() failed " - "ehca_qp=%p qp_num=%x ret=%i", - my_qp, ibqp->qp_num, ret); - goto modify_qp_exit2; - } - } - - /* - * enable RDMA_Atomic_Control if reset->init und reliable con - * this is necessary since gen2 does not provide that flag, - * but pHyp requires it - */ - if (statetrans == IB_QPST_RESET2INIT && - (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { - mqpcb->rdma_atomic_ctrl = 3; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); - } - /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ - if (statetrans == IB_QPST_INIT2RTR && - (ibqp->qp_type == IB_QPT_UC) && - !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { - mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); - } - - if (attr_mask & IB_QP_PKEY_INDEX) { - if (attr->pkey_index >= 16) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid pkey_index=%x. " - "ehca_qp=%p qp_num=%x max_pkey_index=f", - attr->pkey_index, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - mqpcb->prim_p_key_idx = attr->pkey_index; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); - } - if (attr_mask & IB_QP_PORT) { - struct ehca_sport *sport; - struct ehca_qp *aqp1; - if (attr->port_num < 1 || attr->port_num > shca->num_ports) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid port=%x. " - "ehca_qp=%p qp_num=%x num_ports=%x", - attr->port_num, my_qp, ibqp->qp_num, - shca->num_ports); - goto modify_qp_exit2; - } - sport = &shca->sport[attr->port_num - 1]; - if (!sport->ibqp_sqp[IB_QPT_GSI]) { - /* should not occur */ - ret = -EFAULT; - ehca_err(ibqp->device, "AQP1 was not created for " - "port=%x", attr->port_num); - goto modify_qp_exit2; - } - aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], - struct ehca_qp, ib_qp); - if (ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_SMI && - aqp1->mod_qp_parm) { - /* - * firmware will reject this modify_qp() because - * port is not activated/initialized fully - */ - ret = -EFAULT; - ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " - "either port is being activated (try again) " - "or cabling issue", attr->port_num); - goto modify_qp_exit2; - } - mqpcb->prim_phys_port = attr->port_num; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); - } - if (attr_mask & IB_QP_QKEY) { - mqpcb->qkey = attr->qkey; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); - } - if (attr_mask & IB_QP_AV) { - mqpcb->dlid = attr->ah_attr.dlid; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); - mqpcb->source_path_bits = attr->ah_attr.src_path_bits; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); - mqpcb->service_level = attr->ah_attr.sl; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - - if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, - attr->ah_attr.static_rate, - &mqpcb->max_static_rate)) { - ret = -EINVAL; - goto modify_qp_exit2; - } - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); - - /* - * Always supply the GRH flag, even if it's zero, to give the - * hypervisor a clear "yes" or "no" instead of a "perhaps" - */ - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); - - /* - * only if GRH is TRUE we might consider SOURCE_GID_IDX - * and DEST_GID otherwise phype will return H_ATTR_PARM!!! - */ - if (attr->ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag = 1; - - mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); - - for (cnt = 0; cnt < 16; cnt++) - mqpcb->dest_gid.byte[cnt] = - attr->ah_attr.grh.dgid.raw[cnt]; - - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); - mqpcb->flow_label = attr->ah_attr.grh.flow_label; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); - mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); - mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); - } - } - - if (attr_mask & IB_QP_PATH_MTU) { - /* store ld(MTU) */ - my_qp->mtu_shift = attr->path_mtu + 7; - mqpcb->path_mtu = attr->path_mtu; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); - } - if (attr_mask & IB_QP_TIMEOUT) { - mqpcb->timeout = attr->timeout; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); - } - if (attr_mask & IB_QP_RETRY_CNT) { - mqpcb->retry_count = attr->retry_cnt; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); - } - if (attr_mask & IB_QP_RNR_RETRY) { - mqpcb->rnr_retry_count = attr->rnr_retry; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); - } - if (attr_mask & IB_QP_RQ_PSN) { - mqpcb->receive_psn = attr->rq_psn; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); - } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? - attr->max_dest_rd_atomic : 2; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); - } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? - attr->max_rd_atomic : 2; - update_mask |= - EHCA_BMASK_SET - (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); - } - if (attr_mask & IB_QP_ALT_PATH) { - if (attr->alt_port_num < 1 - || attr->alt_port_num > shca->num_ports) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid alt_port=%x. " - "ehca_qp=%p qp_num=%x num_ports=%x", - attr->alt_port_num, my_qp, ibqp->qp_num, - shca->num_ports); - goto modify_qp_exit2; - } - mqpcb->alt_phys_port = attr->alt_port_num; - - if (attr->alt_pkey_index >= 16) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " - "ehca_qp=%p qp_num=%x max_pkey_index=f", - attr->pkey_index, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - mqpcb->alt_p_key_idx = attr->alt_pkey_index; - - mqpcb->timeout_al = attr->alt_timeout; - mqpcb->dlid_al = attr->alt_ah_attr.dlid; - mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; - mqpcb->service_level_al = attr->alt_ah_attr.sl; - - if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, - attr->alt_ah_attr.static_rate, - &mqpcb->max_static_rate_al)) { - ret = -EINVAL; - goto modify_qp_exit2; - } - - /* OpenIB doesn't support alternate retry counts - copy them */ - mqpcb->retry_count_al = mqpcb->retry_count; - mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; - - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) - | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) - | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); - - /* - * Always supply the GRH flag, even if it's zero, to give the - * hypervisor a clear "yes" or "no" instead of a "perhaps" - */ - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); - - /* - * only if GRH is TRUE we might consider SOURCE_GID_IDX - * and DEST_GID otherwise phype will return H_ATTR_PARM!!! - */ - if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag_al = 1; - - for (cnt = 0; cnt < 16; cnt++) - mqpcb->dest_gid_al.byte[cnt] = - attr->alt_ah_attr.grh.dgid.raw[cnt]; - mqpcb->source_gid_idx_al = - attr->alt_ah_attr.grh.sgid_index; - mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; - mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; - mqpcb->traffic_class_al = - attr->alt_ah_attr.grh.traffic_class; - - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | - EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); - } - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) { - mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); - } - - if (attr_mask & IB_QP_SQ_PSN) { - mqpcb->send_psn = attr->sq_psn; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); - } - - if (attr_mask & IB_QP_DEST_QPN) { - mqpcb->dest_qp_nr = attr->dest_qp_num; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); - } - - if (attr_mask & IB_QP_PATH_MIG_STATE) { - if (attr->path_mig_state != IB_MIG_REARM - && attr->path_mig_state != IB_MIG_MIGRATED) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid mig_state=%x", - attr->path_mig_state); - goto modify_qp_exit2; - } - mqpcb->path_migration_state = attr->path_mig_state + 1; - if (attr->path_mig_state == IB_MIG_REARM) - my_qp->mig_armed = 1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); - } - - if (attr_mask & IB_QP_CAP) { - mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); - mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); - /* no support for max_send/recv_sge yet */ - } - - if (ehca_debug_level >= 2) - ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - - if ((my_qp->qp_type == IB_QPT_UD || - my_qp->qp_type == IB_QPT_GSI || - my_qp->qp_type == IB_QPT_SMI) && - statetrans == IB_QPST_SQE2RTS) { - /* doorbell to reprocessing wqes */ - iosync(); /* serialize GAL register access */ - hipz_update_sqa(my_qp, bad_wqe_cnt-1); - ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); - } - - if (statetrans == IB_QPST_RESET2INIT || - statetrans == IB_QPST_INIT2INIT) { - mqpcb->qp_enable = 1; - mqpcb->qp_state = EHCA_QPS_INIT; - update_mask = 0; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, - my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "ENABLE in context of " - "RESET_2_INIT failed! Maybe you didn't get " - "a LID h_ret=%lli ehca_qp=%p qp_num=%x", - h_ret, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - } - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) - && !is_user) { - ret = check_for_left_cqes(my_qp, shca); - if (ret) - goto modify_qp_exit2; - } - - if (statetrans == IB_QPST_ANY2RESET) { - ipz_qeit_reset(&my_qp->ipz_rqueue); - ipz_qeit_reset(&my_qp->ipz_squeue); - - if (qp_cur_state == IB_QPS_ERR && !is_user) { - del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); - - if (HAS_RQ(my_qp)) - del_from_err_list(my_qp->recv_cq, - &my_qp->rq_err_node); - } - if (!is_user) - reset_queue_map(&my_qp->sq_map); - - if (HAS_RQ(my_qp) && !is_user) - reset_queue_map(&my_qp->rq_map); - } - - if (attr_mask & IB_QP_QKEY) - my_qp->qkey = attr->qkey; - -modify_qp_exit2: - if (squeue_locked) { /* this means: sqe -> rts */ - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); - my_qp->sqerr_purgeflag = 1; - } - -modify_qp_exit1: - ehca_free_fw_ctrlblock(mqpcb); - - return ret; -} - -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata) -{ - int ret = 0; - - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, - ib_device); - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - - /* The if-block below caches qp_attr to be modified for GSI and SMI - * qps during the initialization by ib_mad. When the respective port - * is activated, ie we got an event PORT_ACTIVE, we'll replay the - * cached modify calls sequence, see ehca_recover_sqs() below. - * Why that is required: - * 1) If one port is connected, older code requires that port one - * to be connected and module option nr_ports=1 to be given by - * user, which is very inconvenient for end user. - * 2) Firmware accepts modify_qp() only if respective port has become - * active. Older code had a wait loop of 30sec create_qp()/ - * define_aqp1(), which is not appropriate in practice. This - * code now removes that wait loop, see define_aqp1(), and always - * reports all ports to ib_mad resp. users. Only activated ports - * will then usable for the users. - */ - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { - int port = my_qp->init_attr.port_num; - struct ehca_sport *sport = &shca->sport[port - 1]; - unsigned long flags; - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - /* cache qp_attr only during init */ - if (my_qp->mod_qp_parm) { - struct ehca_mod_qp_parm *p; - if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { - ehca_err(&shca->ib_device, - "mod_qp_parm overflow state=%x port=%x" - " type=%x", attr->qp_state, - my_qp->init_attr.port_num, - ibqp->qp_type); - spin_unlock_irqrestore(&sport->mod_sqp_lock, - flags); - return -EINVAL; - } - p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; - p->mask = attr_mask; - p->attr = *attr; - my_qp->mod_qp_parm_idx++; - ehca_dbg(&shca->ib_device, - "Saved qp_attr for state=%x port=%x type=%x", - attr->qp_state, my_qp->init_attr.port_num, - ibqp->qp_type); - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - goto out; - } - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - } - - ret = internal_modify_qp(ibqp, attr, attr_mask, 0); - -out: - if ((ret == 0) && (attr_mask & IB_QP_STATE)) - my_qp->state = attr->qp_state; - - return ret; -} - -void ehca_recover_sqp(struct ib_qp *sqp) -{ - struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); - int port = my_sqp->init_attr.port_num; - struct ib_qp_attr attr; - struct ehca_mod_qp_parm *qp_parm; - int i, qp_parm_idx, ret; - unsigned long flags, wr_cnt; - - if (!my_sqp->mod_qp_parm) - return; - ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); - - qp_parm = my_sqp->mod_qp_parm; - qp_parm_idx = my_sqp->mod_qp_parm_idx; - for (i = 0; i < qp_parm_idx; i++) { - attr = qp_parm[i].attr; - ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); - if (ret) { - ehca_err(sqp->device, "Could not modify SQP port=%x " - "qp_num=%x ret=%x", port, sqp->qp_num, ret); - goto free_qp_parm; - } - ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", - port, sqp->qp_num, attr.qp_state); - } - - /* re-trigger posted recv wrs */ - wr_cnt = my_sqp->ipz_rqueue.current_q_offset / - my_sqp->ipz_rqueue.qe_size; - if (wr_cnt) { - spin_lock_irqsave(&my_sqp->spinlock_r, flags); - hipz_update_rqa(my_sqp, wr_cnt); - spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); - ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", - port, sqp->qp_num, wr_cnt); - } - -free_qp_parm: - kfree(qp_parm); - /* this prevents subsequent calls to modify_qp() to cache qp_attr */ - my_sqp->mod_qp_parm = NULL; -} - -int ehca_query_qp(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - struct hcp_modify_qp_control_block *qpcb; - int cnt, ret = 0; - u64 h_ret; - - if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { - ehca_err(qp->device, "Invalid attribute mask " - "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", - my_qp, qp->qp_num, qp_attr_mask); - return -EINVAL; - } - - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!qpcb) { - ehca_err(qp->device, "Out of memory for qpcb " - "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(adapter_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - qpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(qp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp->qp_num, h_ret); - goto query_qp_exit1; - } - - qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); - qp_attr->qp_state = qp_attr->cur_qp_state; - - if (qp_attr->cur_qp_state == -EINVAL) { - ret = -EINVAL; - ehca_err(qp->device, "Got invalid ehca_qp_state=%x " - "ehca_qp=%p qp_num=%x", - qpcb->qp_state, my_qp, qp->qp_num); - goto query_qp_exit1; - } - - if (qp_attr->qp_state == IB_QPS_SQD) - qp_attr->sq_draining = 1; - - qp_attr->qkey = qpcb->qkey; - qp_attr->path_mtu = qpcb->path_mtu; - qp_attr->path_mig_state = qpcb->path_migration_state - 1; - qp_attr->rq_psn = qpcb->receive_psn; - qp_attr->sq_psn = qpcb->send_psn; - qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; - qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; - qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; - /* UD_AV CIRCUMVENTION */ - if (my_qp->qp_type == IB_QPT_UD) { - qp_attr->cap.max_send_sge = - qpcb->actual_nr_sges_in_sq_wqe - 2; - qp_attr->cap.max_recv_sge = - qpcb->actual_nr_sges_in_rq_wqe - 2; - } else { - qp_attr->cap.max_send_sge = - qpcb->actual_nr_sges_in_sq_wqe; - qp_attr->cap.max_recv_sge = - qpcb->actual_nr_sges_in_rq_wqe; - } - - qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; - qp_attr->dest_qp_num = qpcb->dest_qp_nr; - - qp_attr->pkey_index = qpcb->prim_p_key_idx; - qp_attr->port_num = qpcb->prim_phys_port; - qp_attr->timeout = qpcb->timeout; - qp_attr->retry_cnt = qpcb->retry_count; - qp_attr->rnr_retry = qpcb->rnr_retry_count; - - qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; - qp_attr->alt_port_num = qpcb->alt_phys_port; - qp_attr->alt_timeout = qpcb->timeout_al; - - qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; - qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; - - /* primary av */ - qp_attr->ah_attr.sl = qpcb->service_level; - - if (qpcb->send_grh_flag) { - qp_attr->ah_attr.ah_flags = IB_AH_GRH; - } - - qp_attr->ah_attr.static_rate = qpcb->max_static_rate; - qp_attr->ah_attr.dlid = qpcb->dlid; - qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; - qp_attr->ah_attr.port_num = qp_attr->port_num; - - /* primary GRH */ - qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; - qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; - qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; - qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; - - for (cnt = 0; cnt < 16; cnt++) - qp_attr->ah_attr.grh.dgid.raw[cnt] = - qpcb->dest_gid.byte[cnt]; - - /* alternate AV */ - qp_attr->alt_ah_attr.sl = qpcb->service_level_al; - if (qpcb->send_grh_flag_al) { - qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; - } - - qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; - qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; - qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; - - /* alternate GRH */ - qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; - qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; - qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; - qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; - - for (cnt = 0; cnt < 16; cnt++) - qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = - qpcb->dest_gid_al.byte[cnt]; - - /* return init attributes given in ehca_create_qp */ - if (qp_init_attr) - *qp_init_attr = my_qp->init_attr; - - if (ehca_debug_level >= 2) - ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); - -query_qp_exit1: - ehca_free_fw_ctrlblock(qpcb); - - return ret; -} - -int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) -{ - struct ehca_qp *my_qp = - container_of(ibsrq, struct ehca_qp, ib_srq); - struct ehca_shca *shca = - container_of(ibsrq->pd->device, struct ehca_shca, ib_device); - struct hcp_modify_qp_control_block *mqpcb; - u64 update_mask; - u64 h_ret; - int ret = 0; - - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!mqpcb) { - ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); - return -ENOMEM; - } - - update_mask = 0; - if (attr_mask & IB_SRQ_LIMIT) { - attr_mask &= ~IB_SRQ_LIMIT; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) - | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = attr->srq_limit; - mqpcb->qp_aff_asyn_ev_log_reg = - EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); - } - - /* by now, all bits in attr_mask should have been cleared */ - if (attr_mask) { - ehca_err(ibsrq->device, "invalid attribute mask bits set " - "attr_mask=%x", attr_mask); - ret = -EINVAL; - goto modify_srq_exit0; - } - - if (ehca_debug_level >= 2) - ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, - NULL, update_mask, mqpcb, - my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", - h_ret, my_qp, my_qp->real_qp_num); - } - -modify_srq_exit0: - ehca_free_fw_ctrlblock(mqpcb); - - return ret; -} - -int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) -{ - struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); - struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - struct hcp_modify_qp_control_block *qpcb; - int ret = 0; - u64 h_ret; - - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!qpcb) { - ehca_err(srq->device, "Out of memory for qpcb " - "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, - NULL, qpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(srq->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, h_ret); - goto query_srq_exit1; - } - - srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; - srq_attr->max_sge = 3; - srq_attr->srq_limit = qpcb->curr_srq_limit; - - if (ehca_debug_level >= 2) - ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); - -query_srq_exit1: - ehca_free_fw_ctrlblock(qpcb); - - return ret; -} - -static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject) -{ - struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); - struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, - ib_pd); - struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; - u32 qp_num = my_qp->real_qp_num; - int ret; - u64 h_ret; - u8 port_num; - int is_user = 0; - enum ib_qp_type qp_type; - unsigned long flags; - - if (uobject) { - is_user = 1; - if (my_qp->mm_count_galpa || - my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { - ehca_err(dev, "Resources still referenced in " - "user space qp_num=%x", qp_num); - return -EINVAL; - } - } - - if (my_qp->send_cq) { - ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); - if (ret) { - ehca_err(dev, "Couldn't unassign qp from " - "send_cq ret=%i qp_num=%x cq_num=%x", ret, - qp_num, my_qp->send_cq->cq_number); - return ret; - } - } - - write_lock_irqsave(&ehca_qp_idr_lock, flags); - idr_remove(&ehca_qp_idr, my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - - /* - * SRQs will never get into an error list and do not have a recv_cq, - * so we need to skip them here. - */ - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) - del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); - - if (HAS_SQ(my_qp) && !is_user) - del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); - - /* now wait until all pending events have completed */ - wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); - - h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); - if (h_ret != H_SUCCESS) { - ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); - return ehca2ib_return_code(h_ret); - } - - port_num = my_qp->init_attr.port_num; - qp_type = my_qp->init_attr.qp_type; - - if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - kfree(my_qp->mod_qp_parm); - my_qp->mod_qp_parm = NULL; - shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - } - - /* no support for IB_QPT_SMI yet */ - if (qp_type == IB_QPT_GSI) { - struct ib_event event; - ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port_num; - shca->sport[port_num - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); - } - - if (HAS_RQ(my_qp)) { - ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - if (!is_user) - vfree(my_qp->rq_map.map); - } - if (HAS_SQ(my_qp)) { - ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - if (!is_user) - vfree(my_qp->sq_map.map); - } - kmem_cache_free(qp_cache, my_qp); - atomic_dec(&shca->num_qps); - return 0; -} - -int ehca_destroy_qp(struct ib_qp *qp) -{ - return internal_destroy_qp(qp->device, - container_of(qp, struct ehca_qp, ib_qp), - qp->uobject); -} - -int ehca_destroy_srq(struct ib_srq *srq) -{ - return internal_destroy_qp(srq->device, - container_of(srq, struct ehca_qp, ib_srq), - srq->uobject); -} - -int ehca_init_qp_cache(void) -{ - qp_cache = kmem_cache_create("ehca_cache_qp", - sizeof(struct ehca_qp), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!qp_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_qp_cache(void) -{ - if (qp_cache) - kmem_cache_destroy(qp_cache); -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_reqs.c b/kernel/drivers/infiniband/hw/ehca/ehca_reqs.c deleted file mode 100644 index 47f949843..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_reqs.c +++ /dev/null @@ -1,953 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * post_send/recv, poll_cq, req_notify - * - * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Waleri Fomin <fomin@de.ibm.com> - * Joachim Fenkes <fenkes@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" -#include "hipz_fns.h" - -/* in RC traffic, insert an empty RDMA READ every this many packets */ -#define ACK_CIRC_THRESHOLD 2000000 - -static u64 replace_wr_id(u64 wr_id, u16 idx) -{ - u64 ret; - - ret = wr_id & ~QMAP_IDX_MASK; - ret |= idx & QMAP_IDX_MASK; - - return ret; -} - -static u16 get_app_wr_id(u64 wr_id) -{ - return wr_id & QMAP_IDX_MASK; -} - -static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, - struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr, - u32 rq_map_idx) -{ - u8 cnt_ds; - if (unlikely((recv_wr->num_sge < 0) || - (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { - ehca_gen_err("Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); - return -EINVAL; /* invalid SG list length */ - } - - /* clear wqe header until sglist */ - memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - - wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); - wqe_p->nr_of_data_seg = recv_wr->num_sge; - - for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { - wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = - recv_wr->sg_list[cnt_ds].addr; - wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = - recv_wr->sg_list[cnt_ds].lkey; - wqe_p->u.all_rcv.sg_list[cnt_ds].length = - recv_wr->sg_list[cnt_ds].length; - } - - if (ehca_debug_level >= 3) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", - ipz_rqueue); - ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); - } - - return 0; -} - -#if defined(DEBUG_GSI_SEND_WR) - -/* need ib_mad struct */ -#include <rdma/ib_mad.h> - -static void trace_send_wr_ud(const struct ib_send_wr *send_wr) -{ - int idx; - int j; - while (send_wr) { - struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; - struct ib_sge *sge = send_wr->sg_list; - ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x", idx, send_wr->wr_id, - send_wr->num_sge, send_wr->send_flags, - send_wr->opcode); - if (mad_hdr) { - ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " - "mgmt_class=%x class_version=%x method=%x " - "status=%x class_specific=%x tid=%lx " - "attr_id=%x resv=%x attr_mod=%x", - idx, mad_hdr->base_version, - mad_hdr->mgmt_class, - mad_hdr->class_version, mad_hdr->method, - mad_hdr->status, mad_hdr->class_specific, - mad_hdr->tid, mad_hdr->attr_id, - mad_hdr->resv, - mad_hdr->attr_mod); - } - for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = __va(sge->addr); - ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " - "lkey=%x", - idx, j, data, sge->length, sge->lkey); - /* assume length is n*16 */ - ehca_dmp(data, sge->length, "send_wr#%x sge#%x", - idx, j); - sge++; - } /* eof for j */ - idx++; - send_wr = send_wr->next; - } /* eof while send_wr */ -} - -#endif /* DEBUG_GSI_SEND_WR */ - -static inline int ehca_write_swqe(struct ehca_qp *qp, - struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr, - u32 sq_map_idx, - int hidden) -{ - u32 idx; - u64 dma_length; - struct ehca_av *my_av; - u32 remote_qkey = send_wr->wr.ud.remote_qkey; - struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; - - if (unlikely((send_wr->num_sge < 0) || - (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { - ehca_gen_err("Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); - return -EINVAL; /* invalid SG list length */ - } - - /* clear wqe header until sglist */ - memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - - wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); - - qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); - qmap_entry->reported = 0; - qmap_entry->cqe_req = 0; - - switch (send_wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - wqe_p->optype = WQE_OPTYPE_SEND; - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - wqe_p->optype = WQE_OPTYPE_RDMAWRITE; - break; - case IB_WR_RDMA_READ: - wqe_p->optype = WQE_OPTYPE_RDMAREAD; - break; - default: - ehca_gen_err("Invalid opcode=%x", send_wr->opcode); - return -EINVAL; /* invalid opcode */ - } - - wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; - - wqe_p->wr_flag = 0; - - if ((send_wr->send_flags & IB_SEND_SIGNALED || - qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) - && !hidden) { - wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; - qmap_entry->cqe_req = 1; - } - - if (send_wr->opcode == IB_WR_SEND_WITH_IMM || - send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - /* this might not work as long as HW does not support it */ - wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); - wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; - } - - wqe_p->nr_of_data_seg = send_wr->num_sge; - - switch (qp->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - /* no break is intential here */ - case IB_QPT_UD: - /* IB 1.2 spec C10-15 compliance */ - if (send_wr->wr.ud.remote_qkey & 0x80000000) - remote_qkey = qp->qkey; - - wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; - wqe_p->local_ee_context_qkey = remote_qkey; - if (unlikely(!send_wr->wr.ud.ah)) { - ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); - return -EINVAL; - } - if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { - ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); - return -EINVAL; - } - my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); - wqe_p->u.ud_av.ud_av = my_av->av; - - /* - * omitted check of IB_SEND_INLINE - * since HW does not support it - */ - for (idx = 0; idx < send_wr->num_sge; idx++) { - wqe_p->u.ud_av.sg_list[idx].vaddr = - send_wr->sg_list[idx].addr; - wqe_p->u.ud_av.sg_list[idx].lkey = - send_wr->sg_list[idx].lkey; - wqe_p->u.ud_av.sg_list[idx].length = - send_wr->sg_list[idx].length; - } /* eof for idx */ - if (qp->qp_type == IB_QPT_SMI || - qp->qp_type == IB_QPT_GSI) - wqe_p->u.ud_av.ud_av.pmtu = 1; - if (qp->qp_type == IB_QPT_GSI) { - wqe_p->pkeyi = send_wr->wr.ud.pkey_index; -#ifdef DEBUG_GSI_SEND_WR - trace_send_wr_ud(send_wr); -#endif /* DEBUG_GSI_SEND_WR */ - } - break; - - case IB_QPT_UC: - if (send_wr->send_flags & IB_SEND_FENCE) - wqe_p->wr_flag |= WQE_WRFLAG_FENCE; - /* no break is intentional here */ - case IB_QPT_RC: - /* TODO: atomic not implemented */ - wqe_p->u.nud.remote_virtual_address = - send_wr->wr.rdma.remote_addr; - wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; - - /* - * omitted checking of IB_SEND_INLINE - * since HW does not support it - */ - dma_length = 0; - for (idx = 0; idx < send_wr->num_sge; idx++) { - wqe_p->u.nud.sg_list[idx].vaddr = - send_wr->sg_list[idx].addr; - wqe_p->u.nud.sg_list[idx].lkey = - send_wr->sg_list[idx].lkey; - wqe_p->u.nud.sg_list[idx].length = - send_wr->sg_list[idx].length; - dma_length += send_wr->sg_list[idx].length; - } /* eof idx */ - wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; - - /* unsolicited ack circumvention */ - if (send_wr->opcode == IB_WR_RDMA_READ) { - /* on RDMA read, switch on and reset counters */ - qp->message_count = qp->packet_count = 0; - qp->unsol_ack_circ = 1; - } else - /* else estimate #packets */ - qp->packet_count += (dma_length >> qp->mtu_shift) + 1; - - break; - - default: - ehca_gen_err("Invalid qptype=%x", qp->qp_type); - return -EINVAL; - } - - if (ehca_debug_level >= 3) { - ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); - ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); - } - return 0; -} - -/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ -static inline void map_ib_wc_status(u32 cqe_status, - enum ib_wc_status *wc_status) -{ - if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { - switch (cqe_status & 0x3F) { - case 0x01: - case 0x21: - *wc_status = IB_WC_LOC_LEN_ERR; - break; - case 0x02: - case 0x22: - *wc_status = IB_WC_LOC_QP_OP_ERR; - break; - case 0x03: - case 0x23: - *wc_status = IB_WC_LOC_EEC_OP_ERR; - break; - case 0x04: - case 0x24: - *wc_status = IB_WC_LOC_PROT_ERR; - break; - case 0x05: - case 0x25: - *wc_status = IB_WC_WR_FLUSH_ERR; - break; - case 0x06: - *wc_status = IB_WC_MW_BIND_ERR; - break; - case 0x07: /* remote error - look into bits 20:24 */ - switch ((cqe_status - & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { - case 0x0: - /* - * PSN Sequence Error! - * couldn't find a matching status! - */ - *wc_status = IB_WC_GENERAL_ERR; - break; - case 0x1: - *wc_status = IB_WC_REM_INV_REQ_ERR; - break; - case 0x2: - *wc_status = IB_WC_REM_ACCESS_ERR; - break; - case 0x3: - *wc_status = IB_WC_REM_OP_ERR; - break; - case 0x4: - *wc_status = IB_WC_REM_INV_RD_REQ_ERR; - break; - } - break; - case 0x08: - *wc_status = IB_WC_RETRY_EXC_ERR; - break; - case 0x09: - *wc_status = IB_WC_RNR_RETRY_EXC_ERR; - break; - case 0x0A: - case 0x2D: - *wc_status = IB_WC_REM_ABORT_ERR; - break; - case 0x0B: - case 0x2E: - *wc_status = IB_WC_INV_EECN_ERR; - break; - case 0x0C: - case 0x2F: - *wc_status = IB_WC_INV_EEC_STATE_ERR; - break; - case 0x0D: - *wc_status = IB_WC_BAD_RESP_ERR; - break; - case 0x10: - /* WQE purged */ - *wc_status = IB_WC_WR_FLUSH_ERR; - break; - default: - *wc_status = IB_WC_FATAL_ERR; - - } - } else - *wc_status = IB_WC_SUCCESS; -} - -static inline int post_one_send(struct ehca_qp *my_qp, - struct ib_send_wr *cur_send_wr, - int hidden) -{ - struct ehca_wqe *wqe_p; - int ret; - u32 sq_map_idx; - u64 start_offset = my_qp->ipz_squeue.current_q_offset; - - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " - "qp_num=%x", my_qp->ib_qp.qp_num); - return -ENOMEM; - } - - /* - * Get the index of the WQE in the send queue. The same index is used - * for writing into the sq_map. - */ - sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; - - /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); - /* - * if something failed, - * reset the free entry pointer to the start value - */ - if (unlikely(ret)) { - my_qp->ipz_squeue.current_q_offset = start_offset; - ehca_err(my_qp->ib_qp.device, "Could not write WQE " - "qp_num=%x", my_qp->ib_qp.qp_num); - return -EINVAL; - } - - return 0; -} - -int ehca_post_send(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - int wqe_cnt = 0; - int ret = 0; - unsigned long flags; - - /* Reject WR if QP is in RESET, INIT or RTR state */ - if (unlikely(my_qp->state < IB_QPS_RTS)) { - ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", - my_qp->state, qp->qp_num); - ret = -EINVAL; - goto out; - } - - /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_s, flags); - - /* Send an empty extra RDMA read if: - * 1) there has been an RDMA read on this connection before - * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets - * 3) we can be sure that any previous extra RDMA read has been - * processed so we don't overflow the SQ - */ - if (unlikely(my_qp->unsol_ack_circ && - my_qp->packet_count > ACK_CIRC_THRESHOLD && - my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { - /* insert an empty RDMA READ to fix up the remote QP state */ - struct ib_send_wr circ_wr; - memset(&circ_wr, 0, sizeof(circ_wr)); - circ_wr.opcode = IB_WR_RDMA_READ; - post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ - wqe_cnt++; - ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); - my_qp->message_count = my_qp->packet_count = 0; - } - - /* loop processes list of send reqs */ - while (send_wr) { - ret = post_one_send(my_qp, send_wr, 0); - if (unlikely(ret)) { - goto post_send_exit0; - } - wqe_cnt++; - send_wr = send_wr->next; - } - -post_send_exit0: - iosync(); /* serialize GAL register access */ - hipz_update_sqa(my_qp, wqe_cnt); - if (unlikely(ret || ehca_debug_level >= 2)) - ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", - my_qp, qp->qp_num, wqe_cnt, ret); - my_qp->message_count += wqe_cnt; - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); - -out: - if (ret) - *bad_send_wr = send_wr; - return ret; -} - -static int internal_post_recv(struct ehca_qp *my_qp, - struct ib_device *dev, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - struct ehca_wqe *wqe_p; - int wqe_cnt = 0; - int ret = 0; - u32 rq_map_idx; - unsigned long flags; - struct ehca_qmap_entry *qmap_entry; - - if (unlikely(!HAS_RQ(my_qp))) { - ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", - my_qp, my_qp->real_qp_num, my_qp->ext_type); - ret = -ENODEV; - goto out; - } - - /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_r, flags); - - /* loop processes list of recv reqs */ - while (recv_wr) { - u64 start_offset = my_qp->ipz_rqueue.current_q_offset; - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - ret = -ENOMEM; - ehca_err(dev, "Too many posted WQEs " - "qp_num=%x", my_qp->real_qp_num); - goto post_recv_exit0; - } - /* - * Get the index of the WQE in the recv queue. The same index - * is used for writing into the rq_map. - */ - rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; - - /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, - rq_map_idx); - /* - * if something failed, - * reset the free entry pointer to the start value - */ - if (unlikely(ret)) { - my_qp->ipz_rqueue.current_q_offset = start_offset; - ret = -EINVAL; - ehca_err(dev, "Could not write WQE " - "qp_num=%x", my_qp->real_qp_num); - goto post_recv_exit0; - } - - qmap_entry = &my_qp->rq_map.map[rq_map_idx]; - qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); - qmap_entry->reported = 0; - qmap_entry->cqe_req = 1; - - wqe_cnt++; - recv_wr = recv_wr->next; - } /* eof for recv_wr */ - -post_recv_exit0: - iosync(); /* serialize GAL register access */ - hipz_update_rqa(my_qp, wqe_cnt); - if (unlikely(ret || ehca_debug_level >= 2)) - ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", - my_qp, my_qp->real_qp_num, wqe_cnt, ret); - spin_unlock_irqrestore(&my_qp->spinlock_r, flags); - -out: - if (ret) - *bad_recv_wr = recv_wr; - - return ret; -} - -int ehca_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - - /* Reject WR if QP is in RESET state */ - if (unlikely(my_qp->state == IB_QPS_RESET)) { - ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", - my_qp->state, qp->qp_num); - *bad_recv_wr = recv_wr; - return -EINVAL; - } - - return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); -} - -int ehca_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), - srq->device, recv_wr, bad_recv_wr); -} - -/* - * ib_wc_opcode table converts ehca wc opcode to ib - * Since we use zero to indicate invalid opcode, the actual ib opcode must - * be decremented!!! - */ -static const u8 ib_wc_opcode[255] = { - [0x01] = IB_WC_RECV+1, - [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, - [0x04] = IB_WC_BIND_MW+1, - [0x08] = IB_WC_FETCH_ADD+1, - [0x10] = IB_WC_COMP_SWAP+1, - [0x20] = IB_WC_RDMA_WRITE+1, - [0x40] = IB_WC_RDMA_READ+1, - [0x80] = IB_WC_SEND+1 -}; - -/* internal function to poll one entry of cq */ -static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) -{ - int ret = 0, qmap_tail_idx; - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - struct ehca_cqe *cqe; - struct ehca_qp *my_qp; - struct ehca_qmap_entry *qmap_entry; - struct ehca_queue_map *qmap; - int cqe_count = 0, is_error; - -repoll: - cqe = (struct ehca_cqe *) - ipz_qeit_get_inc_valid(&my_cq->ipz_queue); - if (!cqe) { - ret = -EAGAIN; - if (ehca_debug_level >= 3) - ehca_dbg(cq->device, "Completion queue is empty " - "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); - goto poll_cq_one_exit0; - } - - /* prevents loads being reordered across this point */ - rmb(); - - cqe_count++; - if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp; - int purgeflag; - unsigned long flags; - - qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); - if (!qp) { - ehca_err(cq->device, "cq_num=%x qp_num=%x " - "could not find qp -> ignore cqe", - my_cq->cq_number, cqe->local_qp_number); - ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", - my_cq->cq_number, cqe->local_qp_number); - /* ignore this purged cqe */ - goto repoll; - } - spin_lock_irqsave(&qp->spinlock_s, flags); - purgeflag = qp->sqerr_purgeflag; - spin_unlock_irqrestore(&qp->spinlock_s, flags); - - if (purgeflag) { - ehca_dbg(cq->device, - "Got CQE with purged bit qp_num=%x src_qp=%x", - cqe->local_qp_number, cqe->remote_qp_number); - if (ehca_debug_level >= 2) - ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", - cqe->local_qp_number, - cqe->remote_qp_number); - /* - * ignore this to avoid double cqes of bad wqe - * that caused sqe and turn off purge flag - */ - qp->sqerr_purgeflag = 0; - goto repoll; - } - } - - is_error = cqe->status & WC_STATUS_ERROR_BIT; - - /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ - if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { - ehca_dbg(cq->device, - "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", - is_error ? "ERROR " : "", my_cq, my_cq->cq_number); - ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); - ehca_dbg(cq->device, - "ehca_cq=%p cq_num=%x -------------------------", - my_cq, my_cq->cq_number); - } - - read_lock(&ehca_qp_idr_lock); - my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); - read_unlock(&ehca_qp_idr_lock); - if (!my_qp) - goto repoll; - wc->qp = &my_qp->ib_qp; - - qmap_tail_idx = get_app_wr_id(cqe->work_request_id); - if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) - /* We got a send completion. */ - qmap = &my_qp->sq_map; - else - /* We got a receive completion. */ - qmap = &my_qp->rq_map; - - /* advance the tail pointer */ - qmap->tail = qmap_tail_idx; - - if (is_error) { - /* - * set left_to_poll to 0 because in error state, we will not - * get any additional CQEs - */ - my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, - my_qp->sq_map.entries); - my_qp->sq_map.left_to_poll = 0; - ehca_add_to_err_list(my_qp, 1); - - my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, - my_qp->rq_map.entries); - my_qp->rq_map.left_to_poll = 0; - if (HAS_RQ(my_qp)) - ehca_add_to_err_list(my_qp, 0); - } - - qmap_entry = &qmap->map[qmap_tail_idx]; - if (qmap_entry->reported) { - ehca_warn(cq->device, "Double cqe on qp_num=%#x", - my_qp->real_qp_num); - /* found a double cqe, discard it and read next one */ - goto repoll; - } - - wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); - qmap_entry->reported = 1; - - /* if left_to_poll is decremented to 0, add the QP to the error list */ - if (qmap->left_to_poll > 0) { - qmap->left_to_poll--; - if ((my_qp->sq_map.left_to_poll == 0) && - (my_qp->rq_map.left_to_poll == 0)) { - ehca_add_to_err_list(my_qp, 1); - if (HAS_RQ(my_qp)) - ehca_add_to_err_list(my_qp, 0); - } - } - - /* eval ib_wc_opcode */ - wc->opcode = ib_wc_opcode[cqe->optype]-1; - if (unlikely(wc->opcode == -1)) { - ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " - "ehca_cq=%p cq_num=%x", - cqe->optype, cqe->status, my_cq, my_cq->cq_number); - /* dump cqe for other infos */ - ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); - /* update also queue adder to throw away this entry!!! */ - goto repoll; - } - - /* eval ib_wc_status */ - if (unlikely(is_error)) { - /* complete with errors */ - map_ib_wc_status(cqe->status, &wc->status); - wc->vendor_err = wc->status; - } else - wc->status = IB_WC_SUCCESS; - - wc->byte_len = cqe->nr_bytes_transferred; - wc->pkey_index = cqe->pkey_index; - wc->slid = cqe->rlid; - wc->dlid_path_bits = cqe->dlid; - wc->src_qp = cqe->remote_qp_number; - /* - * HW has "Immed data present" and "GRH present" in bits 6 and 5. - * SW defines those in bits 1 and 0, so we can just shift and mask. - */ - wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; - wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); - wc->sl = cqe->service_level; - -poll_cq_one_exit0: - if (cqe_count > 0) - hipz_update_feca(my_cq, cqe_count); - - return ret; -} - -static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, - struct ib_wc *wc, int num_entries, - struct ipz_queue *ipz_queue, int on_sq) -{ - int nr = 0; - struct ehca_wqe *wqe; - u64 offset; - struct ehca_queue_map *qmap; - struct ehca_qmap_entry *qmap_entry; - - if (on_sq) - qmap = &my_qp->sq_map; - else - qmap = &my_qp->rq_map; - - qmap_entry = &qmap->map[qmap->next_wqe_idx]; - - while ((nr < num_entries) && (qmap_entry->reported == 0)) { - /* generate flush CQE */ - - memset(wc, 0, sizeof(*wc)); - - offset = qmap->next_wqe_idx * ipz_queue->qe_size; - wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); - if (!wqe) { - ehca_err(cq->device, "Invalid wqe offset=%#llx on " - "qp_num=%#x", offset, my_qp->real_qp_num); - return nr; - } - - wc->wr_id = replace_wr_id(wqe->work_request_id, - qmap_entry->app_wr_id); - - if (on_sq) { - switch (wqe->optype) { - case WQE_OPTYPE_SEND: - wc->opcode = IB_WC_SEND; - break; - case WQE_OPTYPE_RDMAWRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case WQE_OPTYPE_RDMAREAD: - wc->opcode = IB_WC_RDMA_READ; - break; - default: - ehca_err(cq->device, "Invalid optype=%x", - wqe->optype); - return nr; - } - } else - wc->opcode = IB_WC_RECV; - - if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { - wc->ex.imm_data = wqe->immediate_data; - wc->wc_flags |= IB_WC_WITH_IMM; - } - - wc->status = IB_WC_WR_FLUSH_ERR; - - wc->qp = &my_qp->ib_qp; - - /* mark as reported and advance next_wqe pointer */ - qmap_entry->reported = 1; - qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, - qmap->entries); - qmap_entry = &qmap->map[qmap->next_wqe_idx]; - - wc++; nr++; - } - - return nr; - -} - -int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) -{ - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int nr; - struct ehca_qp *err_qp; - struct ib_wc *current_wc = wc; - int ret = 0; - unsigned long flags; - int entries_left = num_entries; - - if (num_entries < 1) { - ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " - "cq_num=%x", num_entries, my_cq, my_cq->cq_number); - ret = -EINVAL; - goto poll_cq_exit0; - } - - spin_lock_irqsave(&my_cq->spinlock, flags); - - /* generate flush cqes for send queues */ - list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { - nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, - &err_qp->ipz_squeue, 1); - entries_left -= nr; - current_wc += nr; - - if (entries_left == 0) - break; - } - - /* generate flush cqes for receive queues */ - list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { - nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, - &err_qp->ipz_rqueue, 0); - entries_left -= nr; - current_wc += nr; - - if (entries_left == 0) - break; - } - - for (nr = 0; nr < entries_left; nr++) { - ret = ehca_poll_cq_one(cq, current_wc); - if (ret) - break; - current_wc++; - } /* eof for nr */ - entries_left -= nr; - - spin_unlock_irqrestore(&my_cq->spinlock, flags); - if (ret == -EAGAIN || !ret) - ret = num_entries - entries_left; - -poll_cq_exit0: - return ret; -} - -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) -{ - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int ret = 0; - - switch (notify_flags & IB_CQ_SOLICITED_MASK) { - case IB_CQ_SOLICITED: - hipz_set_cqx_n0(my_cq, 1); - break; - case IB_CQ_NEXT_COMP: - hipz_set_cqx_n1(my_cq, 1); - break; - default: - return -EINVAL; - } - - if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { - unsigned long spl_flags; - spin_lock_irqsave(&my_cq->spinlock, spl_flags); - ret = ipz_qeit_is_valid(&my_cq->ipz_queue); - spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); - } - - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_sqp.c b/kernel/drivers/infiniband/hw/ehca/ehca_sqp.c deleted file mode 100644 index dba8f9f8b..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_sqp.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * SQP functions - * - * Authors: Khadija Souissi <souissi@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <rdma/ib_mad.h> - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) -#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) -#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) - -#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) - -/** - * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue - * pair is created successfully, the corresponding port gets active. - * - * Define Special Queue pair 0 (SMI QP) is still not supported. - * - * @qp_init_attr: Queue pair init attributes with port and queue pair type - */ - -u64 ehca_define_sqp(struct ehca_shca *shca, - struct ehca_qp *ehca_qp, - struct ib_qp_init_attr *qp_init_attr) -{ - u32 pma_qp_nr, bma_qp_nr; - u64 ret; - u8 port = qp_init_attr->port_num; - int counter; - - shca->sport[port - 1].port_state = IB_PORT_DOWN; - - switch (qp_init_attr->qp_type) { - case IB_QPT_SMI: - /* function not supported yet */ - break; - case IB_QPT_GSI: - ret = hipz_h_define_aqp1(shca->ipz_hca_handle, - ehca_qp->ipz_qp_handle, - ehca_qp->galpas.kernel, - (u32) qp_init_attr->port_num, - &pma_qp_nr, &bma_qp_nr); - - if (ret != H_SUCCESS) { - ehca_err(&shca->ib_device, - "Can't define AQP1 for port %x. h_ret=%lli", - port, ret); - return ret; - } - shca->sport[port - 1].pma_qp_nr = pma_qp_nr; - ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", - port, pma_qp_nr); - break; - default: - ehca_err(&shca->ib_device, "invalid qp_type=%x", - qp_init_attr->qp_type); - return H_PARAMETER; - } - - if (ehca_nr_ports < 0) /* autodetect mode */ - return H_SUCCESS; - - for (counter = 0; - shca->sport[port - 1].port_state != IB_PORT_ACTIVE && - counter < ehca_port_act_time; - counter++) { - ehca_dbg(&shca->ib_device, "... wait until port %x is active", - port); - msleep_interruptible(1000); - } - - if (counter == ehca_port_act_time) { - ehca_err(&shca->ib_device, "Port %x is not active.", port); - return H_HARDWARE; - } - - return H_SUCCESS; -} - -struct ib_perf { - struct ib_mad_hdr mad_hdr; - u8 reserved[40]; - u8 data[192]; -} __attribute__ ((packed)); - -/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ -struct tcslfl { - u32 tc:8; - u32 sl:4; - u32 fl:20; -} __attribute__ ((packed)); - -/* IP Version/TC/FL packed into 32 bits, as in GRH */ -struct vertcfl { - u32 ver:4; - u32 tc:8; - u32 fl:20; -} __attribute__ ((packed)); - -static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - struct ib_perf *in_perf = (struct ib_perf *)in_mad; - struct ib_perf *out_perf = (struct ib_perf *)out_mad; - struct ib_class_port_info *poi = - (struct ib_class_port_info *)out_perf->data; - struct tcslfl *tcslfl = - (struct tcslfl *)&poi->redirect_tcslfl; - struct ehca_shca *shca = - container_of(ibdev, struct ehca_shca, ib_device); - struct ehca_sport *sport = &shca->sport[port_num - 1]; - - ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); - - *out_mad = *in_mad; - - if (in_perf->mad_hdr.class_version != 1) { - ehca_warn(ibdev, "Unsupported class_version=%x", - in_perf->mad_hdr.class_version); - out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; - goto perf_reply; - } - - switch (in_perf->mad_hdr.method) { - case IB_MGMT_METHOD_GET: - case IB_MGMT_METHOD_SET: - /* set class port info for redirection */ - out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; - out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; - memset(poi, 0, sizeof(*poi)); - poi->base_version = 1; - poi->class_version = 1; - poi->resp_time_value = 18; - - /* copy local routing information from WC where applicable */ - tcslfl->sl = in_wc->sl; - poi->redirect_lid = - sport->saved_attr.lid | in_wc->dlid_path_bits; - poi->redirect_qp = sport->pma_qp_nr; - poi->redirect_qkey = IB_QP1_QKEY; - - ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, - &poi->redirect_pkey); - - /* if request was globally routed, copy route info */ - if (in_grh) { - struct vertcfl *vertcfl = - (struct vertcfl *)&in_grh->version_tclass_flow; - memcpy(poi->redirect_gid, in_grh->dgid.raw, - sizeof(poi->redirect_gid)); - tcslfl->tc = vertcfl->tc; - tcslfl->fl = vertcfl->fl; - } else - /* else only fill in default GID */ - ehca_query_gid(ibdev, port_num, 0, - (union ib_gid *)&poi->redirect_gid); - - ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", - sport->saved_attr.lid, sport->pma_qp_nr); - break; - - case IB_MGMT_METHOD_GET_RESP: - return IB_MAD_RESULT_FAILURE; - - default: - out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; - break; - } - -perf_reply: - out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - -int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - int ret; - - if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) - return IB_MAD_RESULT_FAILURE; - - /* accept only pma request */ - if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) - return IB_MAD_RESULT_SUCCESS; - - ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); - ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, - in_mad, out_mad); - - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_tools.h b/kernel/drivers/infiniband/hw/ehca/ehca_tools.h deleted file mode 100644 index d280b12aa..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_tools.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * auxiliary functions - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Khadija Souissi <souissik@de.ibm.com> - * Waleri Fomin <fomin@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef EHCA_TOOLS_H -#define EHCA_TOOLS_H - -#include <linux/kernel.h> -#include <linux/spinlock.h> -#include <linux/delay.h> -#include <linux/idr.h> -#include <linux/kthread.h> -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/vmalloc.h> -#include <linux/notifier.h> -#include <linux/cpu.h> -#include <linux/device.h> - -#include <linux/atomic.h> -#include <asm/ibmebus.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/hvcall.h> - -extern int ehca_debug_level; - -#define ehca_dbg(ib_dev, format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ - "PU%04x EHCA_DBG:%s " format "\n", \ - raw_smp_processor_id(), __func__, \ - ## arg); \ - } while (0) - -#define ehca_info(ib_dev, format, arg...) \ - dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_warn(ib_dev, format, arg...) \ - dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_err(ib_dev, format, arg...) \ - dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -/* use this one only if no ib_dev available */ -#define ehca_gen_dbg(format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg); \ - } while (0) - -#define ehca_gen_warn(format, arg...) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_gen_err(format, arg...) \ - printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -/** - * ehca_dmp - printk a memory block, whose length is n*8 bytes. - * Each line has the following layout: - * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex> - */ -#define ehca_dmp(adr, len, format, args...) \ - do { \ - unsigned int x; \ - unsigned int l = (unsigned int)(len); \ - unsigned char *deb = (unsigned char *)(adr); \ - for (x = 0; x < l; x += 16) { \ - printk(KERN_INFO "EHCA_DMP:%s " format \ - " adr=%p ofs=%04x %016llx %016llx\n", \ - __func__, ##args, deb, x, \ - *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ - deb += 16; \ - } \ - } while (0) - -/* define a bitmask, little endian version */ -#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) - -/* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) - -/* internal function, don't use */ -#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) - -/* internal function, don't use */ -#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) - -/** - * EHCA_BMASK_SET - return value shifted and masked by mask - * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable - * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask - * in variable - */ -#define EHCA_BMASK_SET(mask, value) \ - ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) - -/** - * EHCA_BMASK_GET - extract a parameter from value by mask - */ -#define EHCA_BMASK_GET(mask, value) \ - (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) - -/* Converts ehca to ib return code */ -int ehca2ib_return_code(u64 ehca_rc); - -#endif /* EHCA_TOOLS_H */ diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_uverbs.c b/kernel/drivers/infiniband/hw/ehca/ehca_uverbs.c deleted file mode 100644 index 1a1d5d99f..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * userspace support verbs - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/slab.h> - -#include "ehca_classes.h" -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "ehca_tools.h" -#include "hcp_if.h" - -struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, - struct ib_udata *udata) -{ - struct ehca_ucontext *my_context; - - my_context = kzalloc(sizeof *my_context, GFP_KERNEL); - if (!my_context) { - ehca_err(device, "Out of memory device=%p", device); - return ERR_PTR(-ENOMEM); - } - - return &my_context->ib_ucontext; -} - -int ehca_dealloc_ucontext(struct ib_ucontext *context) -{ - kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); - return 0; -} - -static void ehca_mm_open(struct vm_area_struct *vma) -{ - u32 *count = (u32 *)vma->vm_private_data; - if (!count) { - ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - return; - } - (*count)++; - if (!(*count)) - ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", - vma->vm_start, vma->vm_end, *count); -} - -static void ehca_mm_close(struct vm_area_struct *vma) -{ - u32 *count = (u32 *)vma->vm_private_data; - if (!count) { - ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - return; - } - (*count)--; - ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", - vma->vm_start, vma->vm_end, *count); -} - -static const struct vm_operations_struct vm_ops = { - .open = ehca_mm_open, - .close = ehca_mm_close, -}; - -static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, - u32 *mm_count) -{ - int ret; - u64 vsize, physical; - - vsize = vma->vm_end - vma->vm_start; - if (vsize < EHCA_PAGESIZE) { - ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = galpas->user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); - /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ - ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, - vma->vm_page_prot); - if (unlikely(ret)) { - ehca_gen_err("remap_pfn_range() failed ret=%i", ret); - return -ENOMEM; - } - - vma->vm_private_data = mm_count; - (*mm_count)++; - vma->vm_ops = &vm_ops; - - return 0; -} - -static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, - u32 *mm_count) -{ - int ret; - u64 start, ofs; - struct page *page; - - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - start = vma->vm_start; - for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { - u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); - page = virt_to_page(virt_addr); - ret = vm_insert_page(vma, start, page); - if (unlikely(ret)) { - ehca_gen_err("vm_insert_page() failed rc=%i", ret); - return ret; - } - start += PAGE_SIZE; - } - vma->vm_private_data = mm_count; - (*mm_count)++; - vma->vm_ops = &vm_ops; - - return 0; -} - -static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, - u32 rsrc_type) -{ - int ret; - - switch (rsrc_type) { - case 0: /* galpa fw handle */ - ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); - ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_fw() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - case 1: /* cq queue_addr */ - ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); - ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_queue() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - default: - ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", - rsrc_type, cq->cq_number); - return -EINVAL; - } - - return 0; -} - -static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, - u32 rsrc_type) -{ - int ret; - - switch (rsrc_type) { - case 0: /* galpa fw handle */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); - ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return -ENOMEM; - } - break; - - case 1: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, - &qp->mm_count_rqueue); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - case 2: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_squeue, - &qp->mm_count_squeue); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - default: - ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", - rsrc_type, qp->ib_qp.qp_num); - return -EINVAL; - } - - return 0; -} - -int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - u64 fileoffset = vma->vm_pgoff; - u32 idr_handle = fileoffset & 0x1FFFFFF; - u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ - u32 ret; - struct ehca_cq *cq; - struct ehca_qp *qp; - struct ib_uobject *uobject; - - switch (q_type) { - case 0: /* CQ */ - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, idr_handle); - read_unlock(&ehca_cq_idr_lock); - - /* make sure this mmap really belongs to the authorized user */ - if (!cq) - return -EINVAL; - - if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) - return -EINVAL; - - ret = ehca_mmap_cq(vma, cq, rsrc_type); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_cq() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - case 1: /* QP */ - read_lock(&ehca_qp_idr_lock); - qp = idr_find(&ehca_qp_idr, idr_handle); - read_unlock(&ehca_qp_idr_lock); - - /* make sure this mmap really belongs to the authorized user */ - if (!qp) - return -EINVAL; - - uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; - if (!uobject || uobject->context != context) - return -EINVAL; - - ret = ehca_mmap_qp(vma, qp, rsrc_type); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_qp() failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - default: - ehca_gen_err("bad queue type %x", q_type); - return -EINVAL; - } - - return 0; -} diff --git a/kernel/drivers/infiniband/hw/ehca/hcp_if.c b/kernel/drivers/infiniband/hw/ehca/hcp_if.c deleted file mode 100644 index 89517ffb4..000000000 --- a/kernel/drivers/infiniband/hw/ehca/hcp_if.c +++ /dev/null @@ -1,949 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware Infiniband Interface code for POWER - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Joachim Fenkes <fenkes@de.ibm.com> - * Gerd Bayer <gerd.bayer@de.ibm.com> - * Waleri Fomin <fomin@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <asm/hvcall.h> -#include "ehca_tools.h" -#include "hcp_if.h" -#include "hcp_phyp.h" -#include "hipz_fns.h" -#include "ipz_pt_fn.h" - -#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) -#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) -#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) -#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) -#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) -#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) -#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) -#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) -#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) -#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) -#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) - -#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) -#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) -#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) -#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) - -#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) -#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) -#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) -#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) -#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) - -#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) -#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) -#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) -#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) - -#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) -#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) - -#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) -#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) -#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) - -#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" -#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" -#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" - -static DEFINE_SPINLOCK(hcall_lock); - -static long ehca_plpar_hcall_norets(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7) -{ - long ret; - int i, sleep_msecs; - unsigned long flags = 0; - - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - - for (i = 0; i < 5; i++) { - /* serialize hCalls to work around firmware issue */ - if (ehca_lock_hcalls) - spin_lock_irqsave(&hcall_lock, flags); - - ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, - arg5, arg6, arg7); - - if (ehca_lock_hcalls) - spin_unlock_irqrestore(&hcall_lock, flags); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) - ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, - opcode, ret, arg1, arg2, arg3, - arg4, arg5, arg6, arg7); - else - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); - - return ret; - } - - return H_BUSY; -} - -static long ehca_plpar_hcall9(unsigned long opcode, - unsigned long *outs, /* array of 9 outputs */ - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7, - unsigned long arg8, - unsigned long arg9) -{ - long ret; - int i, sleep_msecs; - unsigned long flags = 0; - - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - - for (i = 0; i < 5; i++) { - /* serialize hCalls to work around firmware issue */ - if (ehca_lock_hcalls) - spin_lock_irqsave(&hcall_lock, flags); - - ret = plpar_hcall9(opcode, outs, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - - if (ehca_lock_hcalls) - spin_unlock_irqrestore(&hcall_lock, flags); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) { - ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, - ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - } else if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, - ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - return ret; - } - - return H_BUSY; -} - -u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_pfeq *pfeq, - const u32 neq_control, - const u32 number_of_entries, - struct ipz_eq_handle *eq_handle, - u32 *act_nr_of_entries, - u32 *act_pages, - u32 *eq_ist) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - u64 allocate_controls; - - /* resource type */ - allocate_controls = 3ULL; - - /* ISN is associated */ - if (neq_control != 1) - allocate_controls = (1ULL << (63 - 7)) | allocate_controls; - else /* notification event queue */ - allocate_controls = (1ULL << 63) | allocate_controls; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - allocate_controls, /* r5 */ - number_of_entries, /* r6 */ - 0, 0, 0, 0, 0, 0); - eq_handle->handle = outs[0]; - *act_nr_of_entries = (u32)outs[3]; - *act_pages = (u32)outs[4]; - *eq_ist = (u32)outs[5]; - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resource - ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, - struct ipz_eq_handle eq_handle, - const u64 event_mask) -{ - return ehca_plpar_hcall_norets(H_RESET_EVENTS, - adapter_handle.handle, /* r4 */ - eq_handle.handle, /* r5 */ - event_mask, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - struct ehca_alloc_cq_parms *param) -{ - int rc; - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 2, /* r5 */ - param->eq_handle.handle, /* r6 */ - cq->token, /* r7 */ - param->nr_cqe, /* r8 */ - 0, 0, 0, 0); - cq->ipz_cq_handle.handle = outs[0]; - param->act_nr_of_entries = (u32)outs[3]; - param->act_pages = (u32)outs[4]; - - if (ret == H_SUCCESS) { - rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); - if (rc) { - ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", - rc, outs[5]); - - ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - cq->ipz_cq_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - ret = H_NO_MEM; - } - } - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms, int is_user) -{ - int rc; - u64 ret; - u64 allocate_controls, max_r10_reg, r11, r12; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - allocate_controls = - EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) - | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) - | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) - | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) - | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) - | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, - parms->squeue.page_size) - | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, - parms->rqueue.page_size) - | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, - !!(parms->ll_comp_flags & LLQP_RECV_COMP)) - | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, - !!(parms->ll_comp_flags & LLQP_SEND_COMP)) - | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, - parms->ud_av_l_key_ctl) - | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); - - max_r10_reg = - EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, - parms->squeue.max_wr + 1) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, - parms->rqueue.max_wr + 1) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, - parms->squeue.max_sge) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, - parms->rqueue.max_sge); - - r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); - - if (parms->ext_type == EQPT_SRQ) - r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); - else - r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - allocate_controls, /* r5 */ - parms->send_cq_handle.handle, - parms->recv_cq_handle.handle, - parms->eq_handle.handle, - ((u64)parms->token << 32) | parms->pd.value, - max_r10_reg, r11, r12); - - parms->qp_handle.handle = outs[0]; - parms->real_qp_num = (u32)outs[1]; - parms->squeue.act_nr_wqes = - (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); - parms->rqueue.act_nr_wqes = - (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); - parms->squeue.act_nr_sges = - (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); - parms->rqueue.act_nr_sges = - (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); - parms->squeue.queue_size = - (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); - parms->rqueue.queue_size = - (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); - - if (ret == H_SUCCESS) { - rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); - if (rc) { - ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", - rc, outs[6]); - - ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - parms->qp_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - ret = H_NO_MEM; - } - } - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, - struct hipz_query_port *query_port_response_block) -{ - u64 ret; - u64 r_cb = __pa(query_port_response_block); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("response block not page aligned"); - return H_PARAMETER; - } - - ret = ehca_plpar_hcall_norets(H_QUERY_PORT, - adapter_handle.handle, /* r4 */ - port_id, /* r5 */ - r_cb, /* r6 */ - 0, 0, 0, 0); - - if (ehca_debug_level >= 2) - ehca_dmp(query_port_response_block, 64, "response_block"); - - return ret; -} - -u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, const u32 port_cap, - const u8 init_type, const int modify_mask) -{ - u64 port_attributes = port_cap; - - if (modify_mask & IB_PORT_SHUTDOWN) - port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); - if (modify_mask & IB_PORT_INIT_TYPE) - port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); - if (modify_mask & IB_PORT_RESET_QKEY_CNTR) - port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); - - return ehca_plpar_hcall_norets(H_MODIFY_PORT, - adapter_handle.handle, /* r4 */ - port_id, /* r5 */ - port_attributes, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, - struct hipz_query_hca *query_hca_rblock) -{ - u64 r_cb = __pa(query_hca_rblock); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("response_block=%p not page aligned", - query_hca_rblock); - return H_PARAMETER; - } - - return ehca_plpar_hcall_norets(H_QUERY_HCA, - adapter_handle.handle, /* r4 */ - r_cb, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, - const u8 pagesize, - const u8 queue_type, - const u64 resource_handle, - const u64 logical_address_of_page, - u64 count) -{ - return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, - adapter_handle.handle, /* r4 */ - (u64)queue_type | ((u64)pagesize) << 8, - /* r5 */ - resource_handle, /* r6 */ - logical_address_of_page, /* r7 */ - count, /* r8 */ - 0, 0); -} - -u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_eq_handle eq_handle, - struct ehca_pfeq *pfeq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count) -{ - if (count != 1) { - ehca_gen_err("Ppage counter=%llx", count); - return H_PARAMETER; - } - return hipz_h_register_rpage(adapter_handle, - pagesize, - queue_type, - eq_handle.handle, - logical_address_of_page, count); -} - -u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, - u32 ist) -{ - u64 ret; - ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, - adapter_handle.handle, /* r4 */ - ist, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret != H_SUCCESS && ret != H_BUSY) - ehca_gen_err("Could not query interrupt state."); - - return ret; -} - -u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_cq_handle cq_handle, - struct ehca_pfcq *pfcq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa gal) -{ - if (count != 1) { - ehca_gen_err("Page counter=%llx", count); - return H_PARAMETER; - } - - return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, - cq_handle.handle, logical_address_of_page, - count); -} - -u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa galpa) -{ - if (count > 1) { - ehca_gen_err("Page counter=%llx", count); - return H_PARAMETER; - } - - return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, - qp_handle.handle, logical_address_of_page, - count); -} - -u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - void **log_addr_next_sq_wqe2processed, - void **log_addr_next_rq_wqe2processed, - int dis_and_get_function_code) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, - adapter_handle.handle, /* r4 */ - dis_and_get_function_code, /* r5 */ - qp_handle.handle, /* r6 */ - 0, 0, 0, 0, 0, 0); - if (log_addr_next_sq_wqe2processed) - *log_addr_next_sq_wqe2processed = (void *)outs[0]; - if (log_addr_next_rq_wqe2processed) - *log_addr_next_rq_wqe2processed = (void *)outs[1]; - - return ret; -} - -u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u64 update_mask, - struct hcp_modify_qp_control_block *mqpcb, - struct h_galpa gal) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - update_mask, /* r6 */ - __pa(mqpcb), /* r7 */ - 0, 0, 0, 0, 0); - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Insufficient resources ret=%lli", ret); - - return ret; -} - -u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - struct hcp_modify_qp_control_block *qqpcb, - struct h_galpa gal) -{ - return ehca_plpar_hcall_norets(H_QUERY_QP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - __pa(qqpcb), /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = hcp_galpas_dtor(&qp->galpas); - if (ret) { - ehca_gen_err("Could not destruct qp->galpas"); - return H_RESOURCE; - } - ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, - adapter_handle.handle, /* r4 */ - /* function code */ - 1, /* r5 */ - qp->ipz_qp_handle.handle, /* r6 */ - 0, 0, 0, 0, 0, 0); - if (ret == H_HARDWARE) - ehca_gen_err("HCA not operational. ret=%lli", ret); - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - qp->ipz_qp_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("Resource still in use. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port) -{ - return ehca_plpar_hcall_norets(H_DEFINE_AQP0, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - port, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port, u32 * pma_qp_nr, - u32 * bma_qp_nr) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - port, /* r6 */ - 0, 0, 0, 0, 0, 0); - *pma_qp_nr = (u32)outs[0]; - *bma_qp_nr = (u32)outs[1]; - - if (ret == H_ALIAS_EXIST) - ehca_gen_err("AQP1 already exists. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id) -{ - u64 ret; - - ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - mcg_dlid, /* r6 */ - interface_id, /* r7 */ - subnet_prefix, /* r8 */ - 0, 0); - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id) -{ - return ehca_plpar_hcall_norets(H_DETACH_MCQP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - mcg_dlid, /* r6 */ - interface_id, /* r7 */ - subnet_prefix, /* r8 */ - 0, 0); -} - -u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - u8 force_flag) -{ - u64 ret; - - ret = hcp_galpas_dtor(&cq->galpas); - if (ret) { - ehca_gen_err("Could not destruct cp->galpas"); - return H_RESOURCE; - } - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - cq->ipz_cq_handle.handle, /* r5 */ - force_flag != 0 ? 1L : 0L, /* r6 */ - 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_eq *eq) -{ - u64 ret; - - ret = hcp_galpas_dtor(&eq->galpas); - if (ret) { - ehca_gen_err("Could not destruct eq->galpas"); - return H_RESOURCE; - } - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - eq->ipz_eq_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("Resource in use. ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 5, /* r5 */ - vaddr, /* r6 */ - length, /* r7 */ - (((u64)access_ctrl) << 32ULL), /* r8 */ - pd.value, /* r9 */ - 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count) -{ - u64 ret; - - if (unlikely(ehca_debug_level >= 3)) { - if (count > 1) { - u64 *kpage; - int i; - kpage = __va(logical_address_of_page); - for (i = 0; i < count; i++) - ehca_gen_dbg("kpage[%d]=%p", - i, (void *)kpage[i]); - } else - ehca_gen_dbg("kpage=%p", - (void *)logical_address_of_page); - } - - if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { - ehca_gen_err("logical_address_of_page not on a 4k boundary " - "adapter_handle=%llx mr=%p mr_handle=%llx " - "pagesize=%x queue_type=%x " - "logical_address_of_page=%llx count=%llx", - adapter_handle.handle, mr, - mr->ipz_mr_handle.handle, pagesize, queue_type, - logical_address_of_page, count); - ret = H_PARAMETER; - } else - ret = hipz_h_register_rpage(adapter_handle, pagesize, - queue_type, - mr->ipz_mr_handle.handle, - logical_address_of_page, count); - return ret; -} - -u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_QUERY_MR, outs, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 0, 0, 0); - outparms->len = outs[0]; - outparms->vaddr = outs[1]; - outparms->acl = outs[4] >> 32; - outparms->lkey = (u32)(outs[5] >> 32); - outparms->rkey = (u32)(outs[5] & (0xffffffff)); - - return ret; -} - -u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr) -{ - return ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr_in, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - const u64 mr_addr_cb, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - vaddr_in, /* r6 */ - length, /* r7 */ - /* r8 */ - ((((u64)access_ctrl) << 32ULL) | pd.value), - mr_addr_cb, /* r9 */ - 0, 0, 0); - outparms->vaddr = outs[1]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const struct ehca_mr *orig_mr, - const u64 vaddr_in, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, - adapter_handle.handle, /* r4 */ - orig_mr->ipz_mr_handle.handle, /* r5 */ - vaddr_in, /* r6 */ - (((u64)access_ctrl) << 32ULL), /* r7 */ - pd.value, /* r8 */ - 0, 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - const struct ipz_pd pd, - struct ehca_mw_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 6, /* r5 */ - pd.value, /* r6 */ - 0, 0, 0, 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - struct ehca_mw_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_QUERY_MW, outs, - adapter_handle.handle, /* r4 */ - mw->ipz_mw_handle.handle, /* r5 */ - 0, 0, 0, 0, 0, 0, 0); - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw) -{ - return ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - mw->ipz_mw_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, - const u64 ressource_handle, - void *rblock, - unsigned long *byte_count) -{ - u64 r_cb = __pa(rblock); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("rblock not page aligned."); - return H_PARAMETER; - } - - return ehca_plpar_hcall_norets(H_ERROR_DATA, - adapter_handle.handle, - ressource_handle, - r_cb, - 0, 0, 0, 0); -} - -u64 hipz_h_eoi(int irq) -{ - unsigned long xirr; - - iosync(); - xirr = (0xffULL << 24) | irq; - - return plpar_hcall_norets(H_EOI, xirr); -} diff --git a/kernel/drivers/infiniband/hw/ehca/hcp_if.h b/kernel/drivers/infiniband/hw/ehca/hcp_if.h deleted file mode 100644 index a46e514c3..000000000 --- a/kernel/drivers/infiniband/hw/ehca/hcp_if.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware Infiniband Interface code for POWER - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Gerd Bayer <gerd.bayer@de.ibm.com> - * Waleri Fomin <fomin@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HCP_IF_H__ -#define __HCP_IF_H__ - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "hipz_hw.h" - -/* - * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize - * resources, create the empty EQPT (ring). - */ -u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_pfeq *pfeq, - const u32 neq_control, - const u32 number_of_entries, - struct ipz_eq_handle *eq_handle, - u32 * act_nr_of_entries, - u32 * act_pages, - u32 * eq_ist); - -u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, - struct ipz_eq_handle eq_handle, - const u64 event_mask); -/* - * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize - * resources, create the empty CQPT (ring). - */ -u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - struct ehca_alloc_cq_parms *param); - - -/* - * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, - * initialize resources, create empty QPPTs (2 rings). - */ -u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms, int is_user); - -u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, - struct hipz_query_port *query_port_response_block); - -u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, const u32 port_cap, - const u8 init_type, const int modify_mask); - -u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, - struct hipz_query_hca *query_hca_rblock); - -/* - * hipz_h_register_rpage internal function in hcp_if.h for all - * hcp_H_REGISTER_RPAGE calls. - */ -u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, - const u8 pagesize, - const u8 queue_type, - const u64 resource_handle, - const u64 logical_address_of_page, - u64 count); - -u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_eq_handle eq_handle, - struct ehca_pfeq *pfeq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count); - -u64 hipz_h_query_int_state(const struct ipz_adapter_handle - hcp_adapter_handle, - u32 ist); - -u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_cq_handle cq_handle, - struct ehca_pfcq *pfcq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa gal); - -u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa galpa); - -u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - void **log_addr_next_sq_wqe_tb_processed, - void **log_addr_next_rq_wqe_tb_processed, - int dis_and_get_function_code); -enum hcall_sigt { - HCALL_SIGT_NO_CQE = 0, - HCALL_SIGT_BY_WQE = 1, - HCALL_SIGT_EVERY = 2 -}; - -u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u64 update_mask, - struct hcp_modify_qp_control_block *mqpcb, - struct h_galpa gal); - -u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - struct hcp_modify_qp_control_block *qqpcb, - struct h_galpa gal); - -u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp); - -u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port); - -u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port, u32 * pma_qp_nr, - u32 * bma_qp_nr); - -u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id); - -u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id); - -u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - u8 force_flag); - -u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_eq *eq); - -/* - * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize - * resources. - */ -u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ -u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count); - -/* hipz_h_query_mr queries MR in HW and FW */ -u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_free_resource_mr frees MR resources in HW and FW */ -u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr); - -/* hipz_h_reregister_pmr reregisters MR in HW and FW */ -u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr_in, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - const u64 mr_addr_cb, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_register_smr register shared MR in HW and FW */ -u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const struct ehca_mr *orig_mr, - const u64 vaddr_in, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms); - -/* - * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize - * resources. - */ -u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - const struct ipz_pd pd, - struct ehca_mw_hipzout_parms *outparms); - -/* hipz_h_query_mw queries MW in HW and FW */ -u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - struct ehca_mw_hipzout_parms *outparms); - -/* hipz_h_free_resource_mw frees MW resources in HW and FW */ -u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw); - -u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, - const u64 ressource_handle, - void *rblock, - unsigned long *byte_count); -u64 hipz_h_eoi(int irq); - -#endif /* __HCP_IF_H__ */ diff --git a/kernel/drivers/infiniband/hw/ehca/hcp_phyp.c b/kernel/drivers/infiniband/hw/ehca/hcp_phyp.c deleted file mode 100644 index 077376ff3..000000000 --- a/kernel/drivers/infiniband/hw/ehca/hcp_phyp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * load store abstraction for ehca register access with tracing - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ehca_classes.h" -#include "hipz_hw.h" - -u64 hcall_map_page(u64 physaddr) -{ - return (u64)ioremap(physaddr, EHCA_PAGESIZE); -} - -int hcall_unmap_page(u64 mapaddr) -{ - iounmap((volatile void __iomem *) mapaddr); - return 0; -} - -int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, - u64 paddr_kernel, u64 paddr_user) -{ - if (!is_user) { - galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); - if (!galpas->kernel.fw_handle) - return -ENOMEM; - } else - galpas->kernel.fw_handle = 0; - - galpas->user.fw_handle = paddr_user; - - return 0; -} - -int hcp_galpas_dtor(struct h_galpas *galpas) -{ - if (galpas->kernel.fw_handle) { - int ret = hcall_unmap_page(galpas->kernel.fw_handle); - if (ret) - return ret; - } - - galpas->user.fw_handle = galpas->kernel.fw_handle = 0; - - return 0; -} diff --git a/kernel/drivers/infiniband/hw/ehca/hcp_phyp.h b/kernel/drivers/infiniband/hw/ehca/hcp_phyp.h deleted file mode 100644 index d1b029910..000000000 --- a/kernel/drivers/infiniband/hw/ehca/hcp_phyp.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware calls - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Waleri Fomin <fomin@de.ibm.com> - * Gerd Bayer <gerd.bayer@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HCP_PHYP_H__ -#define __HCP_PHYP_H__ - - -/* - * eHCA page (mapped into memory) - * resource to access eHCA register pages in CPU address space -*/ -struct h_galpa { - u64 fw_handle; - /* for pSeries this is a 64bit memory address where - I/O memory is mapped into CPU address space (kv) */ -}; - -/* - * resource to access eHCA address space registers, all types - */ -struct h_galpas { - u32 pid; /*PID of userspace galpa checking */ - struct h_galpa user; /* user space accessible resource, - set to 0 if unused */ - struct h_galpa kernel; /* kernel space accessible resource, - set to 0 if unused */ -}; - -static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) -{ - u64 addr = galpa.fw_handle + offset; - return *(volatile u64 __force *)addr; -} - -static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) -{ - u64 addr = galpa.fw_handle + offset; - *(volatile u64 __force *)addr = value; -} - -int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, - u64 paddr_kernel, u64 paddr_user); - -int hcp_galpas_dtor(struct h_galpas *galpas); - -u64 hcall_map_page(u64 physaddr); - -int hcall_unmap_page(u64 mapaddr); - -#endif diff --git a/kernel/drivers/infiniband/hw/ehca/hipz_fns.h b/kernel/drivers/infiniband/hw/ehca/hipz_fns.h deleted file mode 100644 index 9dac93d02..000000000 --- a/kernel/drivers/infiniband/hw/ehca/hipz_fns.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HW abstraction register functions - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_FNS_H__ -#define __HIPZ_FNS_H__ - -#include "ehca_classes.h" -#include "hipz_hw.h" - -#include "hipz_fns_core.h" - -#define hipz_galpa_store_eq(gal, offset, value) \ - hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) - -#define hipz_galpa_load_eq(gal, offset) \ - hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qped(gal, offset, value) \ - hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) - -#define hipz_galpa_load_qped(gal, offset) \ - hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) - -#define hipz_galpa_store_mrmw(gal, offset, value) \ - hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) - -#define hipz_galpa_load_mrmw(gal, offset) \ - hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) - -#endif diff --git a/kernel/drivers/infiniband/hw/ehca/hipz_fns_core.h b/kernel/drivers/infiniband/hw/ehca/hipz_fns_core.h deleted file mode 100644 index 868735fd3..000000000 --- a/kernel/drivers/infiniband/hw/ehca/hipz_fns_core.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HW abstraction register functions - * - * Authors: Christoph Raisch <raisch@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_FNS_CORE_H__ -#define __HIPZ_FNS_CORE_H__ - -#include "hcp_phyp.h" -#include "hipz_hw.h" - -#define hipz_galpa_store_cq(gal, offset, value) \ - hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) - -#define hipz_galpa_load_cq(gal, offset) \ - hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qp(gal, offset, value) \ - hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) -#define hipz_galpa_load_qp(gal, offset) \ - hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) - -static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) -{ - /* ringing doorbell :-) */ - hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, - EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); -} - -static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) -{ - /* ringing doorbell :-) */ - hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, - EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); -} - -static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) -{ - hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, - EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); -} - -static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) -{ - u64 cqx_n0_reg; - - hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, - EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, - value)); - cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); -} - -static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) -{ - u64 cqx_n1_reg; - - hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, - EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); - cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); -} - -#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/kernel/drivers/infiniband/hw/ehca/hipz_hw.h b/kernel/drivers/infiniband/hw/ehca/hipz_hw.h deleted file mode 100644 index bf996c7ac..000000000 --- a/kernel/drivers/infiniband/hw/ehca/hipz_hw.h +++ /dev/null @@ -1,414 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * eHCA register definitions - * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_HW_H__ -#define __HIPZ_HW_H__ - -#include "ehca_tools.h" - -#define EHCA_MAX_MTU 4 - -/* QP Table Entry Memory Map */ -struct hipz_qptemm { - u64 qpx_hcr; - u64 qpx_c; - u64 qpx_herr; - u64 qpx_aer; -/* 0x20*/ - u64 qpx_sqa; - u64 qpx_sqc; - u64 qpx_rqa; - u64 qpx_rqc; -/* 0x40*/ - u64 qpx_st; - u64 qpx_pmstate; - u64 qpx_pmfa; - u64 qpx_pkey; -/* 0x60*/ - u64 qpx_pkeya; - u64 qpx_pkeyb; - u64 qpx_pkeyc; - u64 qpx_pkeyd; -/* 0x80*/ - u64 qpx_qkey; - u64 qpx_dqp; - u64 qpx_dlidp; - u64 qpx_portp; -/* 0xa0*/ - u64 qpx_slidp; - u64 qpx_slidpp; - u64 qpx_dlida; - u64 qpx_porta; -/* 0xc0*/ - u64 qpx_slida; - u64 qpx_slidpa; - u64 qpx_slvl; - u64 qpx_ipd; -/* 0xe0*/ - u64 qpx_mtu; - u64 qpx_lato; - u64 qpx_rlimit; - u64 qpx_rnrlimit; -/* 0x100*/ - u64 qpx_t; - u64 qpx_sqhp; - u64 qpx_sqptp; - u64 qpx_nspsn; -/* 0x120*/ - u64 qpx_nspsnhwm; - u64 reserved1; - u64 qpx_sdsi; - u64 qpx_sdsbc; -/* 0x140*/ - u64 qpx_sqwsize; - u64 qpx_sqwts; - u64 qpx_lsn; - u64 qpx_nssn; -/* 0x160 */ - u64 qpx_mor; - u64 qpx_cor; - u64 qpx_sqsize; - u64 qpx_erc; -/* 0x180*/ - u64 qpx_rnrrc; - u64 qpx_ernrwt; - u64 qpx_rnrresp; - u64 qpx_lmsna; -/* 0x1a0 */ - u64 qpx_sqhpc; - u64 qpx_sqcptp; - u64 qpx_sigt; - u64 qpx_wqecnt; -/* 0x1c0*/ - u64 qpx_rqhp; - u64 qpx_rqptp; - u64 qpx_rqsize; - u64 qpx_nrr; -/* 0x1e0*/ - u64 qpx_rdmac; - u64 qpx_nrpsn; - u64 qpx_lapsn; - u64 qpx_lcr; -/* 0x200*/ - u64 qpx_rwc; - u64 qpx_rwva; - u64 qpx_rdsi; - u64 qpx_rdsbc; -/* 0x220*/ - u64 qpx_rqwsize; - u64 qpx_crmsn; - u64 qpx_rdd; - u64 qpx_larpsn; -/* 0x240*/ - u64 qpx_pd; - u64 qpx_scqn; - u64 qpx_rcqn; - u64 qpx_aeqn; -/* 0x260*/ - u64 qpx_aaelog; - u64 qpx_ram; - u64 qpx_rdmaqe0; - u64 qpx_rdmaqe1; -/* 0x280*/ - u64 qpx_rdmaqe2; - u64 qpx_rdmaqe3; - u64 qpx_nrpsnhwm; -/* 0x298*/ - u64 reserved[(0x400 - 0x298) / 8]; -/* 0x400 extended data */ - u64 reserved_ext[(0x500 - 0x400) / 8]; -/* 0x500 */ - u64 reserved2[(0x1000 - 0x500) / 8]; -/* 0x1000 */ -}; - -#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) -#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) -#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) - -#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) - -/* MRMWPT Entry Memory Map */ -struct hipz_mrmwmm { - /* 0x00 */ - u64 mrx_hcr; - - u64 mrx_c; - u64 mrx_herr; - u64 mrx_aer; - /* 0x20 */ - u64 mrx_pp; - u64 reserved1; - u64 reserved2; - u64 reserved3; - /* 0x40 */ - u64 reserved4[(0x200 - 0x40) / 8]; - /* 0x200 */ - u64 mrx_ctl[64]; - -}; - -#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) - -struct hipz_qpedmm { - /* 0x00 */ - u64 reserved0[(0x400) / 8]; - /* 0x400 */ - u64 qpedx_phh; - u64 qpedx_ppsgp; - /* 0x410 */ - u64 qpedx_ppsgu; - u64 qpedx_ppdgp; - /* 0x420 */ - u64 qpedx_ppdgu; - u64 qpedx_aph; - /* 0x430 */ - u64 qpedx_apsgp; - u64 qpedx_apsgu; - /* 0x440 */ - u64 qpedx_apdgp; - u64 qpedx_apdgu; - /* 0x450 */ - u64 qpedx_apav; - u64 qpedx_apsav; - /* 0x460 */ - u64 qpedx_hcr; - u64 reserved1[4]; - /* 0x488 */ - u64 qpedx_rrl0; - /* 0x490 */ - u64 qpedx_rrrkey0; - u64 qpedx_rrva0; - /* 0x4a0 */ - u64 reserved2; - u64 qpedx_rrl1; - /* 0x4b0 */ - u64 qpedx_rrrkey1; - u64 qpedx_rrva1; - /* 0x4c0 */ - u64 reserved3; - u64 qpedx_rrl2; - /* 0x4d0 */ - u64 qpedx_rrrkey2; - u64 qpedx_rrva2; - /* 0x4e0 */ - u64 reserved4; - u64 qpedx_rrl3; - /* 0x4f0 */ - u64 qpedx_rrrkey3; - u64 qpedx_rrva3; -}; - -#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) - -/* CQ Table Entry Memory Map */ -struct hipz_cqtemm { - u64 cqx_hcr; - u64 cqx_c; - u64 cqx_herr; - u64 cqx_aer; -/* 0x20 */ - u64 cqx_ptp; - u64 cqx_tp; - u64 cqx_fec; - u64 cqx_feca; -/* 0x40 */ - u64 cqx_ep; - u64 cqx_eq; -/* 0x50 */ - u64 reserved1; - u64 cqx_n0; -/* 0x60 */ - u64 cqx_n1; - u64 reserved2[(0x1000 - 0x60) / 8]; -/* 0x1000 */ -}; - -#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) -#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) -#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) -#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) - -#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) - -/* EQ Table Entry Memory Map */ -struct hipz_eqtemm { - u64 eqx_hcr; - u64 eqx_c; - - u64 eqx_herr; - u64 eqx_aer; -/* 0x20 */ - u64 eqx_ptp; - u64 eqx_tp; - u64 eqx_ssba; - u64 eqx_psba; - -/* 0x40 */ - u64 eqx_cec; - u64 eqx_meql; - u64 eqx_xisbi; - u64 eqx_xisc; -/* 0x60 */ - u64 eqx_it; - -}; - -#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) - -/* access control defines for MR/MW */ -#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 -#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 -#define HIPZ_ACCESSCTRL_R_READ 0x00200000 -#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 -#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 - -/* query hca response block */ -struct hipz_query_hca { - u32 cur_reliable_dg; - u32 cur_qp; - u32 cur_cq; - u32 cur_eq; - u32 cur_mr; - u32 cur_mw; - u32 cur_ee_context; - u32 cur_mcast_grp; - u32 cur_qp_attached_mcast_grp; - u32 reserved1; - u32 cur_ipv6_qp; - u32 cur_eth_qp; - u32 cur_hp_mr; - u32 reserved2[3]; - u32 max_rd_domain; - u32 max_qp; - u32 max_cq; - u32 max_eq; - u32 max_mr; - u32 max_hp_mr; - u32 max_mw; - u32 max_mrwpte; - u32 max_special_mrwpte; - u32 max_rd_ee_context; - u32 max_mcast_grp; - u32 max_total_mcast_qp_attach; - u32 max_mcast_qp_attach; - u32 max_raw_ipv6_qp; - u32 max_raw_ethy_qp; - u32 internal_clock_frequency; - u32 max_pd; - u32 max_ah; - u32 max_cqe; - u32 max_wqes_wq; - u32 max_partitions; - u32 max_rr_ee_context; - u32 max_rr_qp; - u32 max_rr_hca; - u32 max_act_wqs_ee_context; - u32 max_act_wqs_qp; - u32 max_sge; - u32 max_sge_rd; - u32 memory_page_size_supported; - u64 max_mr_size; - u32 local_ca_ack_delay; - u32 num_ports; - u32 vendor_id; - u32 vendor_part_id; - u32 hw_ver; - u64 node_guid; - u64 hca_cap_indicators; - u32 data_counter_register_size; - u32 max_shared_rq; - u32 max_isns_eq; - u32 max_neq; -} __attribute__ ((packed)); - -#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) -#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) -#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) -#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) -#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) -#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) -#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) -#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) -#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) -#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) -#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) -#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) -#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) -#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) -#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) -#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) -#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) -#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) - -/* query port response block */ -struct hipz_query_port { - u32 state; - u32 bad_pkey_cntr; - u32 lmc; - u32 lid; - u32 subnet_timeout; - u32 qkey_viol_cntr; - u32 sm_sl; - u32 sm_lid; - u32 capability_mask; - u32 init_type_reply; - u32 pkey_tbl_len; - u32 gid_tbl_len; - u64 gid_prefix; - u32 port_nr; - u16 pkey_entries[16]; - u8 reserved1[32]; - u32 trent_size; - u32 trbuf_size; - u64 max_msg_sz; - u32 max_mtu; - u32 vl_cap; - u32 phys_pstate; - u32 phys_state; - u32 phys_speed; - u32 phys_width; - u8 reserved2[1884]; - u64 guid_entries[255]; -} __attribute__ ((packed)); - -#endif diff --git a/kernel/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/kernel/drivers/infiniband/hw/ehca/ipz_pt_fn.c deleted file mode 100644 index 8d594517c..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ /dev/null @@ -1,295 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * internal queue handling - * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/slab.h> - -#include "ehca_tools.h" -#include "ipz_pt_fn.h" -#include "ehca_classes.h" - -#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) - -struct kmem_cache *small_qp_cache; - -void *ipz_qpageit_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - queue->current_q_offset += queue->pagesize; - if (queue->current_q_offset > queue->queue_length) { - queue->current_q_offset -= queue->pagesize; - ret = NULL; - } - if (((u64)ret) % queue->pagesize) { - ehca_gen_err("ERROR!! not at PAGE-Boundary"); - return NULL; - } - return ret; -} - -void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u64 last_entry_in_q = queue->queue_length - queue->qe_size; - - queue->current_q_offset += queue->qe_size; - if (queue->current_q_offset > last_entry_in_q) { - queue->current_q_offset = 0; - queue->toggle_state = (~queue->toggle_state) & 1; - } - - return ret; -} - -int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) -{ - int i; - for (i = 0; i < queue->queue_length / queue->pagesize; i++) { - u64 page = __pa(queue->queue_pages[i]); - if (addr >= page && addr < page + queue->pagesize) { - *q_offset = addr - page + i * queue->pagesize; - return 0; - } - } - return -EINVAL; -} - -#if PAGE_SHIFT < EHCA_PAGESHIFT -#error Kernel pages must be at least as large than eHCA pages (4K) ! -#endif - -/* - * allocate pages for queue: - * outer loop allocates whole kernel pages (page aligned) and - * inner loop divides a kernel page into smaller hca queue pages - */ -static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) -{ - int k, f = 0; - u8 *kpage; - - while (f < nr_of_pages) { - kpage = (u8 *)get_zeroed_page(GFP_KERNEL); - if (!kpage) - goto out; - - for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { - queue->queue_pages[f] = (struct ipz_page *)kpage; - kpage += EHCA_PAGESIZE; - f++; - } - } - return 1; - -out: - for (f = 0; f < nr_of_pages && queue->queue_pages[f]; - f += PAGES_PER_KPAGE) - free_page((unsigned long)(queue->queue_pages)[f]); - return 0; -} - -static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) -{ - int order = ilog2(queue->pagesize) - 9; - struct ipz_small_queue_page *page; - unsigned long bit; - - mutex_lock(&pd->lock); - - if (!list_empty(&pd->free[order])) - page = list_entry(pd->free[order].next, - struct ipz_small_queue_page, list); - else { - page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); - if (!page) - goto out; - - page->page = get_zeroed_page(GFP_KERNEL); - if (!page->page) { - kmem_cache_free(small_qp_cache, page); - goto out; - } - - list_add(&page->list, &pd->free[order]); - } - - bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); - __set_bit(bit, page->bitmap); - page->fill++; - - if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) - list_move(&page->list, &pd->full[order]); - - mutex_unlock(&pd->lock); - - queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); - queue->small_page = page; - queue->offset = bit << (order + 9); - return 1; - -out: - ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); - mutex_unlock(&pd->lock); - return 0; -} - -static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) -{ - int order = ilog2(queue->pagesize) - 9; - struct ipz_small_queue_page *page = queue->small_page; - unsigned long bit; - int free_page = 0; - - bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK) - >> (order + 9); - - mutex_lock(&pd->lock); - - __clear_bit(bit, page->bitmap); - page->fill--; - - if (page->fill == 0) { - list_del(&page->list); - free_page = 1; - } - - if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) - /* the page was full until we freed the chunk */ - list_move_tail(&page->list, &pd->free[order]); - - mutex_unlock(&pd->lock); - - if (free_page) { - free_page(page->page); - kmem_cache_free(small_qp_cache, page); - } -} - -int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, - const u32 nr_of_pages, const u32 pagesize, - const u32 qe_size, const u32 nr_of_sg, - int is_small) -{ - if (pagesize > PAGE_SIZE) { - ehca_gen_err("FATAL ERROR: pagesize=%x " - "is greater than kernel page size", pagesize); - return 0; - } - - /* init queue fields */ - queue->queue_length = nr_of_pages * pagesize; - queue->pagesize = pagesize; - queue->qe_size = qe_size; - queue->act_nr_of_sg = nr_of_sg; - queue->current_q_offset = 0; - queue->toggle_state = 1; - queue->small_page = NULL; - - /* allocate queue page pointers */ - queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), - GFP_KERNEL | __GFP_NOWARN); - if (!queue->queue_pages) { - queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); - if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; - } - } - - /* allocate actual queue pages */ - if (is_small) { - if (!alloc_small_queue_page(queue, pd)) - goto ipz_queue_ctor_exit0; - } else - if (!alloc_queue_pages(queue, nr_of_pages)) - goto ipz_queue_ctor_exit0; - - return 1; - -ipz_queue_ctor_exit0: - ehca_gen_err("Couldn't alloc pages queue=%p " - "nr_of_pages=%x", queue, nr_of_pages); - if (is_vmalloc_addr(queue->queue_pages)) - vfree(queue->queue_pages); - else - kfree(queue->queue_pages); - - return 0; -} - -int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) -{ - int i, nr_pages; - - if (!queue || !queue->queue_pages) { - ehca_gen_dbg("queue or queue_pages is NULL"); - return 0; - } - - if (queue->small_page) - free_small_queue_page(queue, pd); - else { - nr_pages = queue->queue_length / queue->pagesize; - for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) - free_page((unsigned long)queue->queue_pages[i]); - } - - if (is_vmalloc_addr(queue->queue_pages)) - vfree(queue->queue_pages); - else - kfree(queue->queue_pages); - - return 1; -} - -int ehca_init_small_qp_cache(void) -{ - small_qp_cache = kmem_cache_create("ehca_cache_small_qp", - sizeof(struct ipz_small_queue_page), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!small_qp_cache) - return -ENOMEM; - - return 0; -} - -void ehca_cleanup_small_qp_cache(void) -{ - kmem_cache_destroy(small_qp_cache); -} diff --git a/kernel/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/kernel/drivers/infiniband/hw/ehca/ipz_pt_fn.h deleted file mode 100644 index a801274ea..000000000 --- a/kernel/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ /dev/null @@ -1,289 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * internal queue handling - * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Reinhard Ernst <rernst@de.ibm.com> - * Christoph Raisch <raisch@de.ibm.com> - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __IPZ_PT_FN_H__ -#define __IPZ_PT_FN_H__ - -#define EHCA_PAGESHIFT 12 -#define EHCA_PAGESIZE 4096UL -#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) -#define EHCA_PT_ENTRIES 512UL - -#include "ehca_tools.h" -#include "ehca_qes.h" - -struct ehca_pd; -struct ipz_small_queue_page; - -extern struct kmem_cache *small_qp_cache; - -/* struct generic ehca page */ -struct ipz_page { - u8 entries[EHCA_PAGESIZE]; -}; - -#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) - -struct ipz_small_queue_page { - unsigned long page; - unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; - int fill; - void *mapped_addr; - u32 mmap_count; - struct list_head list; -}; - -/* struct generic queue in linux kernel virtual memory (kv) */ -struct ipz_queue { - u64 current_q_offset; /* current queue entry */ - - struct ipz_page **queue_pages; /* array of pages belonging to queue */ - u32 qe_size; /* queue entry size */ - u32 act_nr_of_sg; - u32 queue_length; /* queue length allocated in bytes */ - u32 pagesize; - u32 toggle_state; /* toggle flag - per page */ - u32 offset; /* save offset within page for small_qp */ - struct ipz_small_queue_page *small_page; -}; - -/* - * return current Queue Entry for a certain q_offset - * returns address (kv) of Queue Entry - */ -static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) -{ - struct ipz_page *current_page; - if (q_offset >= queue->queue_length) - return NULL; - current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; - return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; -} - -/* - * return current Queue Entry - * returns address (kv) of Queue Entry - */ -static inline void *ipz_qeit_get(struct ipz_queue *queue) -{ - return ipz_qeit_calc(queue, queue->current_q_offset); -} - -/* - * return current Queue Page , increment Queue Page iterator from - * page to page in struct ipz_queue, last increment will return 0! and - * NOT wrap - * returns address (kv) of Queue Page - * warning don't use in parallel with ipz_QE_get_inc() - */ -void *ipz_qpageit_get_inc(struct ipz_queue *queue); - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * warning don't use in parallel with ipz_qpageit_get_inc() - */ -static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - queue->current_q_offset += queue->qe_size; - if (queue->current_q_offset >= queue->queue_length) { - queue->current_q_offset = 0; - /* toggle the valid flag */ - queue->toggle_state = (~queue->toggle_state) & 1; - } - - return ret; -} - -/* - * return a bool indicating whether current Queue Entry is valid - */ -static inline int ipz_qeit_is_valid(struct ipz_queue *queue) -{ - struct ehca_cqe *cqe = ipz_qeit_get(queue); - return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); -} - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * returns 0 and does not increment, if wrong valid state - * warning don't use in parallel with ipz_qpageit_get_inc() - */ -static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) -{ - return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; -} - -/* - * returns and resets Queue Entry iterator - * returns address (kv) of first Queue Entry - */ -static inline void *ipz_qeit_reset(struct ipz_queue *queue) -{ - queue->current_q_offset = 0; - return ipz_qeit_get(queue); -} - -/* - * return the q_offset corresponding to an absolute address - */ -int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); - -/* - * return the next queue offset. don't modify the queue. - */ -static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) -{ - offset += queue->qe_size; - if (offset >= queue->queue_length) offset = 0; - return offset; -} - -/* struct generic page table */ -struct ipz_pt { - u64 entries[EHCA_PT_ENTRIES]; -}; - -/* struct page table for a queue, only to be used in pf */ -struct ipz_qpt { - /* queue page tables (kv), use u64 because we know the element length */ - u64 *qpts; - u32 n_qpts; - u32 n_ptes; /* number of page table entries */ - u64 *current_pte_addr; -}; - -/* - * constructor for a ipz_queue_t, placement new for ipz_queue_t, - * new for all dependent datastructors - * all QP Tables are the same - * flow: - * allocate+pin queue - * see ipz_qpt_ctor() - * returns true if ok, false if out of memory - */ -int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, - const u32 nr_of_pages, const u32 pagesize, - const u32 qe_size, const u32 nr_of_sg, - int is_small); - -/* - * destructor for a ipz_queue_t - * -# free queue - * see ipz_queue_ctor() - * returns true if ok, false if queue was NULL-ptr of free failed - */ -int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); - -/* - * constructor for a ipz_qpt_t, - * placement new for struct ipz_queue, new for all dependent datastructors - * all QP Tables are the same, - * flow: - * -# allocate+pin queue - * -# initialise ptcb - * -# allocate+pin PTs - * -# link PTs to a ring, according to HCA Arch, set bit62 id needed - * -# the ring must have room for exactly nr_of_PTEs - * see ipz_qpt_ctor() - */ -void ipz_qpt_ctor(struct ipz_qpt *qpt, - const u32 nr_of_qes, - const u32 pagesize, - const u32 qe_size, - const u8 lowbyte, const u8 toggle, - u32 * act_nr_of_QEs, u32 * act_nr_of_pages); - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries - * fix EQ page problems - */ -void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); - -/* - * return current Event Queue Entry, increment Queue Entry iterator - * by one step in struct ipz_queue if valid, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * returns 0 and does not increment, if wrong valid state - * warning don't use in parallel with ipz_queue_QPageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries - */ -static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *)ret; - if ((qe >> 7) != (queue->toggle_state & 1)) - return NULL; - ipz_qeit_eq_get_inc(queue); /* this is a good one */ - return ret; -} - -static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *)ret; - if ((qe >> 7) != (queue->toggle_state & 1)) - return NULL; - return ret; -} - -/* returns address (GX) of first queue entry */ -static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) -{ - return be64_to_cpu(qpt->qpts[0]); -} - -/* returns address (kv) of first page of queue page table */ -static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) -{ - return qpt->qpts; -} - -#endif /* __IPZ_PT_FN_H__ */ diff --git a/kernel/drivers/infiniband/hw/ipath/Kconfig b/kernel/drivers/infiniband/hw/ipath/Kconfig deleted file mode 100644 index 1d9bb115c..000000000 --- a/kernel/drivers/infiniband/hw/ipath/Kconfig +++ /dev/null @@ -1,11 +0,0 @@ -config INFINIBAND_IPATH - tristate "QLogic HTX HCA support" - depends on 64BIT && NET && HT_IRQ - ---help--- - This is a driver for the obsolete QLogic Hyper-Transport - IB host channel adapter (model QHT7140), - including InfiniBand verbs support. This driver allows these - devices to be used with both kernel upper level protocols such - as IP-over-InfiniBand as well as with userspace applications - (in conjunction with InfiniBand userspace access). - For QLogic PCIe QLE based cards, use the QIB driver instead. diff --git a/kernel/drivers/infiniband/hw/ipath/Makefile b/kernel/drivers/infiniband/hw/ipath/Makefile deleted file mode 100644 index 4496f2820..000000000 --- a/kernel/drivers/infiniband/hw/ipath/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \ - -DIPATH_KERN_TYPE=0 - -obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o - -ib_ipath-y := \ - ipath_cq.o \ - ipath_diag.o \ - ipath_dma.o \ - ipath_driver.o \ - ipath_eeprom.o \ - ipath_file_ops.o \ - ipath_fs.o \ - ipath_init_chip.o \ - ipath_intr.o \ - ipath_keys.o \ - ipath_mad.o \ - ipath_mmap.o \ - ipath_mr.o \ - ipath_qp.o \ - ipath_rc.o \ - ipath_ruc.o \ - ipath_sdma.o \ - ipath_srq.o \ - ipath_stats.o \ - ipath_sysfs.o \ - ipath_uc.o \ - ipath_ud.o \ - ipath_user_pages.o \ - ipath_user_sdma.o \ - ipath_verbs_mcast.o \ - ipath_verbs.o - -ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o - -ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o -ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_common.h b/kernel/drivers/infiniband/hw/ipath/ipath_common.h deleted file mode 100644 index 28cfe97cf..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_common.h +++ /dev/null @@ -1,851 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _IPATH_COMMON_H -#define _IPATH_COMMON_H - -/* - * This file contains defines, structures, etc. that are used - * to communicate between kernel and user code. - */ - - -/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */ -#define IPATH_SRC_OUI_1 0x00 -#define IPATH_SRC_OUI_2 0x11 -#define IPATH_SRC_OUI_3 0x75 - -/* version of protocol header (known to chip also). In the long run, - * we should be able to generate and accept a range of version numbers; - * for now we only accept one, and it's compiled in. - */ -#define IPS_PROTO_VERSION 2 - -/* - * These are compile time constants that you may want to enable or disable - * if you are trying to debug problems with code or performance. - * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in - * fastpath code - * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be - * traced in faspath code - * _IPATH_TRACING define as 0 if you want to remove all tracing in a - * compilation unit - * _IPATH_DEBUGGING define as 0 if you want to remove debug prints - */ - -/* - * The value in the BTH QP field that InfiniPath uses to differentiate - * an infinipath protocol IB packet vs standard IB transport - */ -#define IPATH_KD_QP 0x656b79 - -/* - * valid states passed to ipath_set_linkstate() user call - */ -#define IPATH_IB_LINKDOWN 0 -#define IPATH_IB_LINKARM 1 -#define IPATH_IB_LINKACTIVE 2 -#define IPATH_IB_LINKDOWN_ONLY 3 -#define IPATH_IB_LINKDOWN_SLEEP 4 -#define IPATH_IB_LINKDOWN_DISABLE 5 -#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */ -#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */ -#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */ -#define IPATH_IB_LINK_HRTBT 9 /* enable heartbeat, normal, non-loopback */ - -/* - * These 3 values (SDR and DDR may be ORed for auto-speed - * negotiation) are used for the 3rd argument to path_f_set_ib_cfg - * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They - * are also the the possible values for ipath_link_speed_enabled and active - * The values were chosen to match values used within the IB spec. - */ -#define IPATH_IB_SDR 1 -#define IPATH_IB_DDR 2 - -/* - * stats maintained by the driver. For now, at least, this is global - * to all minor devices. - */ -struct infinipath_stats { - /* number of interrupts taken */ - __u64 sps_ints; - /* number of interrupts for errors */ - __u64 sps_errints; - /* number of errors from chip (not incl. packet errors or CRC) */ - __u64 sps_errs; - /* number of packet errors from chip other than CRC */ - __u64 sps_pkterrs; - /* number of packets with CRC errors (ICRC and VCRC) */ - __u64 sps_crcerrs; - /* number of hardware errors reported (parity, etc.) */ - __u64 sps_hwerrs; - /* number of times IB link changed state unexpectedly */ - __u64 sps_iblink; - __u64 sps_unused; /* was fastrcvint, no longer implemented */ - /* number of kernel (port0) packets received */ - __u64 sps_port0pkts; - /* number of "ethernet" packets sent by driver */ - __u64 sps_ether_spkts; - /* number of "ethernet" packets received by driver */ - __u64 sps_ether_rpkts; - /* number of SMA packets sent by driver. Obsolete. */ - __u64 sps_sma_spkts; - /* number of SMA packets received by driver. Obsolete. */ - __u64 sps_sma_rpkts; - /* number of times all ports rcvhdrq was full and packet dropped */ - __u64 sps_hdrqfull; - /* number of times all ports egrtid was full and packet dropped */ - __u64 sps_etidfull; - /* - * number of times we tried to send from driver, but no pio buffers - * avail - */ - __u64 sps_nopiobufs; - /* number of ports currently open */ - __u64 sps_ports; - /* list of pkeys (other than default) accepted (0 means not set) */ - __u16 sps_pkeys[4]; - __u16 sps_unused16[4]; /* available; maintaining compatible layout */ - /* number of user ports per chip (not IB ports) */ - __u32 sps_nports; - /* not our interrupt, or already handled */ - __u32 sps_nullintr; - /* max number of packets handled per receive call */ - __u32 sps_maxpkts_call; - /* avg number of packets handled per receive call */ - __u32 sps_avgpkts_call; - /* total number of pages locked */ - __u64 sps_pagelocks; - /* total number of pages unlocked */ - __u64 sps_pageunlocks; - /* - * Number of packets dropped in kernel other than errors (ether - * packets if ipath not configured, etc.) - */ - __u64 sps_krdrops; - __u64 sps_txeparity; /* PIO buffer parity error, recovered */ - /* pad for future growth */ - __u64 __sps_pad[45]; -}; - -/* - * These are the status bits readable (in ascii form, 64bit value) - * from the "status" sysfs file. - */ -#define IPATH_STATUS_INITTED 0x1 /* basic initialization done */ -#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */ -/* Device has been disabled via admin request */ -#define IPATH_STATUS_ADMIN_DISABLED 0x4 -/* Chip has been found and initted */ -#define IPATH_STATUS_CHIP_PRESENT 0x20 -/* IB link is at ACTIVE, usable for data traffic */ -#define IPATH_STATUS_IB_READY 0x40 -/* link is configured, LID, MTU, etc. have been set */ -#define IPATH_STATUS_IB_CONF 0x80 -/* no link established, probably no cable */ -#define IPATH_STATUS_IB_NOCABLE 0x100 -/* A Fatal hardware error has occurred. */ -#define IPATH_STATUS_HWERROR 0x200 - -/* - * The list of usermode accessible registers. Also see Reg_* later in file. - */ -typedef enum _ipath_ureg { - /* (RO) DMA RcvHdr to be used next. */ - ur_rcvhdrtail = 0, - /* (RW) RcvHdr entry to be processed next by host. */ - ur_rcvhdrhead = 1, - /* (RO) Index of next Eager index to use. */ - ur_rcvegrindextail = 2, - /* (RW) Eager TID to be processed next */ - ur_rcvegrindexhead = 3, - /* For internal use only; max register number. */ - _IPATH_UregMax -} ipath_ureg; - -/* bit values for spi_runtime_flags */ -#define IPATH_RUNTIME_HT 0x1 -#define IPATH_RUNTIME_PCIE 0x2 -#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 -#define IPATH_RUNTIME_RCVHDR_COPY 0x8 -#define IPATH_RUNTIME_MASTER 0x10 -#define IPATH_RUNTIME_NODMA_RTAIL 0x80 -#define IPATH_RUNTIME_SDMA 0x200 -#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400 -#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800 - -/* - * This structure is returned by ipath_userinit() immediately after - * open to get implementation-specific info, and info specific to this - * instance. - * - * This struct must have explict pad fields where type sizes - * may result in different alignments between 32 and 64 bit - * programs, since the 64 bit * bit kernel requires the user code - * to have matching offsets - */ -struct ipath_base_info { - /* version of hardware, for feature checking. */ - __u32 spi_hw_version; - /* version of software, for feature checking. */ - __u32 spi_sw_version; - /* InfiniPath port assigned, goes into sent packets */ - __u16 spi_port; - __u16 spi_subport; - /* - * IB MTU, packets IB data must be less than this. - * The MTU is in bytes, and will be a multiple of 4 bytes. - */ - __u32 spi_mtu; - /* - * Size of a PIO buffer. Any given packet's total size must be less - * than this (in words). Included is the starting control word, so - * if 513 is returned, then total pkt size is 512 words or less. - */ - __u32 spi_piosize; - /* size of the TID cache in infinipath, in entries */ - __u32 spi_tidcnt; - /* size of the TID Eager list in infinipath, in entries */ - __u32 spi_tidegrcnt; - /* size of a single receive header queue entry in words. */ - __u32 spi_rcvhdrent_size; - /* - * Count of receive header queue entries allocated. - * This may be less than the spu_rcvhdrcnt passed in!. - */ - __u32 spi_rcvhdr_cnt; - - /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */ - __u32 spi_runtime_flags; - - /* address where receive buffer queue is mapped into */ - __u64 spi_rcvhdr_base; - - /* user program. */ - - /* base address of eager TID receive buffers. */ - __u64 spi_rcv_egrbufs; - - /* Allocated by initialization code, not by protocol. */ - - /* - * Size of each TID buffer in host memory, starting at - * spi_rcv_egrbufs. The buffers are virtually contiguous. - */ - __u32 spi_rcv_egrbufsize; - /* - * The special QP (queue pair) value that identifies an infinipath - * protocol packet from standard IB packets. More, probably much - * more, to be added. - */ - __u32 spi_qpair; - - /* - * User register base for init code, not to be used directly by - * protocol or applications. - */ - __u64 __spi_uregbase; - /* - * Maximum buffer size in bytes that can be used in a single TID - * entry (assuming the buffer is aligned to this boundary). This is - * the minimum of what the hardware and software support Guaranteed - * to be a power of 2. - */ - __u32 spi_tid_maxsize; - /* - * alignment of each pio send buffer (byte count - * to add to spi_piobufbase to get to second buffer) - */ - __u32 spi_pioalign; - /* - * The index of the first pio buffer available to this process; - * needed to do lookup in spi_pioavailaddr; not added to - * spi_piobufbase. - */ - __u32 spi_pioindex; - /* number of buffers mapped for this process */ - __u32 spi_piocnt; - - /* - * Base address of writeonly pio buffers for this process. - * Each buffer has spi_piosize words, and is aligned on spi_pioalign - * boundaries. spi_piocnt buffers are mapped from this address - */ - __u64 spi_piobufbase; - - /* - * Base address of readonly memory copy of the pioavail registers. - * There are 2 bits for each buffer. - */ - __u64 spi_pioavailaddr; - - /* - * Address where driver updates a copy of the interface and driver - * status (IPATH_STATUS_*) as a 64 bit value. It's followed by a - * string indicating hardware error, if there was one. - */ - __u64 spi_status; - - /* number of chip ports available to user processes */ - __u32 spi_nports; - /* unit number of chip we are using */ - __u32 spi_unit; - /* num bufs in each contiguous set */ - __u32 spi_rcv_egrperchunk; - /* size in bytes of each contiguous set */ - __u32 spi_rcv_egrchunksize; - /* total size of mmap to cover full rcvegrbuffers */ - __u32 spi_rcv_egrbuftotlen; - __u32 spi_filler_for_align; - /* address of readonly memory copy of the rcvhdrq tail register. */ - __u64 spi_rcvhdr_tailaddr; - - /* shared memory pages for subports if port is shared */ - __u64 spi_subport_uregbase; - __u64 spi_subport_rcvegrbuf; - __u64 spi_subport_rcvhdr_base; - - /* shared memory page for hardware port if it is shared */ - __u64 spi_port_uregbase; - __u64 spi_port_rcvegrbuf; - __u64 spi_port_rcvhdr_base; - __u64 spi_port_rcvhdr_tailaddr; - -} __attribute__ ((aligned(8))); - - -/* - * This version number is given to the driver by the user code during - * initialization in the spu_userversion field of ipath_user_info, so - * the driver can check for compatibility with user code. - * - * The major version changes when data structures - * change in an incompatible way. The driver must be the same or higher - * for initialization to succeed. In some cases, a higher version - * driver will not interoperate with older software, and initialization - * will return an error. - */ -#define IPATH_USER_SWMAJOR 1 - -/* - * Minor version differences are always compatible - * a within a major version, however if user software is larger - * than driver software, some new features and/or structure fields - * may not be implemented; the user code must deal with this if it - * cares, or it must abort after initialization reports the difference. - */ -#define IPATH_USER_SWMINOR 6 - -#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) - -#define IPATH_KERN_TYPE 0 - -/* - * Similarly, this is the kernel version going back to the user. It's - * slightly different, in that we want to tell if the driver was built as - * part of a QLogic release, or from the driver from openfabrics.org, - * kernel.org, or a standard distribution, for support reasons. - * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied. - * - * It's returned by the driver to the user code during initialization in the - * spi_sw_version field of ipath_base_info, so the user code can in turn - * check for compatibility with the kernel. -*/ -#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION) - -/* - * This structure is passed to ipath_userinit() to tell the driver where - * user code buffers are, sizes, etc. The offsets and sizes of the - * fields must remain unchanged, for binary compatibility. It can - * be extended, if userversion is changed so user code can tell, if needed - */ -struct ipath_user_info { - /* - * version of user software, to detect compatibility issues. - * Should be set to IPATH_USER_SWVERSION. - */ - __u32 spu_userversion; - - /* desired number of receive header queue entries */ - __u32 spu_rcvhdrcnt; - - /* size of struct base_info to write to */ - __u32 spu_base_info_size; - - /* - * number of words in KD protocol header - * This tells InfiniPath how many words to copy to rcvhdrq. If 0, - * kernel uses a default. Once set, attempts to set any other value - * are an error (EAGAIN) until driver is reloaded. - */ - __u32 spu_rcvhdrsize; - - /* - * If two or more processes wish to share a port, each process - * must set the spu_subport_cnt and spu_subport_id to the same - * values. The only restriction on the spu_subport_id is that - * it be unique for a given node. - */ - __u16 spu_subport_cnt; - __u16 spu_subport_id; - - __u32 spu_unused; /* kept for compatible layout */ - - /* - * address of struct base_info to write to - */ - __u64 spu_base_info; - -} __attribute__ ((aligned(8))); - -/* User commands. */ - -#define IPATH_CMD_MIN 16 - -#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */ -#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */ -#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */ -#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ -#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ -#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ -#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */ -#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ -#define IPATH_CMD_USER_INIT 24 /* set up userspace */ -#define IPATH_CMD_UNUSED_1 25 -#define IPATH_CMD_UNUSED_2 26 -#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ -#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ -#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ -/* 30 is unused */ -#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */ -#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */ - -/* - * Poll types - */ -#define IPATH_POLL_TYPE_URGENT 0x01 -#define IPATH_POLL_TYPE_OVERFLOW 0x02 - -struct ipath_port_info { - __u32 num_active; /* number of active units */ - __u32 unit; /* unit (chip) assigned to caller */ - __u16 port; /* port on unit assigned to caller */ - __u16 subport; /* subport on unit assigned to caller */ - __u16 num_ports; /* number of ports available on unit */ - __u16 num_subports; /* number of subports opened on port */ -}; - -struct ipath_tid_info { - __u32 tidcnt; - /* make structure same size in 32 and 64 bit */ - __u32 tid__unused; - /* virtual address of first page in transfer */ - __u64 tidvaddr; - /* pointer (same size 32/64 bit) to __u16 tid array */ - __u64 tidlist; - - /* - * pointer (same size 32/64 bit) to bitmap of TIDs used - * for this call; checked for being large enough at open - */ - __u64 tidmap; -}; - -struct ipath_cmd { - __u32 type; /* command type */ - union { - struct ipath_tid_info tid_info; - struct ipath_user_info user_info; - - /* - * address in userspace where we should put the sdma - * inflight counter - */ - __u64 sdma_inflight; - /* - * address in userspace where we should put the sdma - * completion counter - */ - __u64 sdma_complete; - /* address in userspace of struct ipath_port_info to - write result to */ - __u64 port_info; - /* enable/disable receipt of packets */ - __u32 recv_ctrl; - /* enable/disable armlaunch errors (non-zero to enable) */ - __u32 armlaunch_ctrl; - /* partition key to set */ - __u16 part_key; - /* user address of __u32 bitmask of active slaves */ - __u64 slave_mask_addr; - /* type of polling we want */ - __u16 poll_type; - } cmd; -}; - -struct ipath_iovec { - /* Pointer to data, but same size 32 and 64 bit */ - __u64 iov_base; - - /* - * Length of data; don't need 64 bits, but want - * ipath_sendpkt to remain same size as before 32 bit changes, so... - */ - __u64 iov_len; -}; - -/* - * Describes a single packet for send. Each packet can have one or more - * buffers, but the total length (exclusive of IB headers) must be less - * than the MTU, and if using the PIO method, entire packet length, - * including IB headers, must be less than the ipath_piosize value (words). - * Use of this necessitates including sys/uio.h - */ -struct __ipath_sendpkt { - __u32 sps_flags; /* flags for packet (TBD) */ - __u32 sps_cnt; /* number of entries to use in sps_iov */ - /* array of iov's describing packet. TEMPORARY */ - struct ipath_iovec sps_iov[4]; -}; - -/* - * diagnostics can send a packet by "writing" one of the following - * two structs to diag data special file - * The first is the legacy version for backward compatibility - */ -struct ipath_diag_pkt { - __u32 unit; - __u64 data; - __u32 len; -}; - -/* The second diag_pkt struct is the expanded version that allows - * more control over the packet, specifically, by allowing a custom - * pbc (+ static rate) qword, so that special modes and deliberate - * changes to CRCs can be used. The elements were also re-ordered - * for better alignment and to avoid padding issues. - */ -struct ipath_diag_xpkt { - __u64 data; - __u64 pbc_wd; - __u32 unit; - __u32 len; -}; - -/* - * Data layout in I2C flash (for GUID, etc.) - * All fields are little-endian binary unless otherwise stated - */ -#define IPATH_FLASH_VERSION 2 -struct ipath_flash { - /* flash layout version (IPATH_FLASH_VERSION) */ - __u8 if_fversion; - /* checksum protecting if_length bytes */ - __u8 if_csum; - /* - * valid length (in use, protected by if_csum), including - * if_fversion and if_csum themselves) - */ - __u8 if_length; - /* the GUID, in network order */ - __u8 if_guid[8]; - /* number of GUIDs to use, starting from if_guid */ - __u8 if_numguid; - /* the (last 10 characters of) board serial number, in ASCII */ - char if_serial[12]; - /* board mfg date (YYYYMMDD ASCII) */ - char if_mfgdate[8]; - /* last board rework/test date (YYYYMMDD ASCII) */ - char if_testdate[8]; - /* logging of error counts, TBD */ - __u8 if_errcntp[4]; - /* powered on hours, updated at driver unload */ - __u8 if_powerhour[2]; - /* ASCII free-form comment field */ - char if_comment[32]; - /* Backwards compatible prefix for longer QLogic Serial Numbers */ - char if_sprefix[4]; - /* 82 bytes used, min flash size is 128 bytes */ - __u8 if_future[46]; -}; - -/* - * These are the counters implemented in the chip, and are listed in order. - * The InterCaps naming is taken straight from the chip spec. - */ -struct infinipath_counters { - __u64 LBIntCnt; - __u64 LBFlowStallCnt; - __u64 TxSDmaDescCnt; /* was Reserved1 */ - __u64 TxUnsupVLErrCnt; - __u64 TxDataPktCnt; - __u64 TxFlowPktCnt; - __u64 TxDwordCnt; - __u64 TxLenErrCnt; - __u64 TxMaxMinLenErrCnt; - __u64 TxUnderrunCnt; - __u64 TxFlowStallCnt; - __u64 TxDroppedPktCnt; - __u64 RxDroppedPktCnt; - __u64 RxDataPktCnt; - __u64 RxFlowPktCnt; - __u64 RxDwordCnt; - __u64 RxLenErrCnt; - __u64 RxMaxMinLenErrCnt; - __u64 RxICRCErrCnt; - __u64 RxVCRCErrCnt; - __u64 RxFlowCtrlErrCnt; - __u64 RxBadFormatCnt; - __u64 RxLinkProblemCnt; - __u64 RxEBPCnt; - __u64 RxLPCRCErrCnt; - __u64 RxBufOvflCnt; - __u64 RxTIDFullErrCnt; - __u64 RxTIDValidErrCnt; - __u64 RxPKeyMismatchCnt; - __u64 RxP0HdrEgrOvflCnt; - __u64 RxP1HdrEgrOvflCnt; - __u64 RxP2HdrEgrOvflCnt; - __u64 RxP3HdrEgrOvflCnt; - __u64 RxP4HdrEgrOvflCnt; - __u64 RxP5HdrEgrOvflCnt; - __u64 RxP6HdrEgrOvflCnt; - __u64 RxP7HdrEgrOvflCnt; - __u64 RxP8HdrEgrOvflCnt; - __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */ - __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */ - __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 IBStatusChangeCnt; - __u64 IBLinkErrRecoveryCnt; - __u64 IBLinkDownedCnt; - __u64 IBSymbolErrCnt; - /* The following are new for IBA7220 */ - __u64 RxVL15DroppedPktCnt; - __u64 RxOtherLocalPhyErrCnt; - __u64 PcieRetryBufDiagQwordCnt; - __u64 ExcessBufferOvflCnt; - __u64 LocalLinkIntegrityErrCnt; - __u64 RxVlErrCnt; - __u64 RxDlidFltrCnt; -}; - -/* - * The next set of defines are for packet headers, and chip register - * and memory bits that are visible to and/or used by user-mode software - * The other bits that are used only by the driver or diags are in - * ipath_registers.h - */ - -/* RcvHdrFlags bits */ -#define INFINIPATH_RHF_LENGTH_MASK 0x7FF -#define INFINIPATH_RHF_LENGTH_SHIFT 0 -#define INFINIPATH_RHF_RCVTYPE_MASK 0x7 -#define INFINIPATH_RHF_RCVTYPE_SHIFT 11 -#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF -#define INFINIPATH_RHF_EGRINDEX_SHIFT 16 -#define INFINIPATH_RHF_SEQ_MASK 0xF -#define INFINIPATH_RHF_SEQ_SHIFT 0 -#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF -#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4 -#define INFINIPATH_RHF_H_ICRCERR 0x80000000 -#define INFINIPATH_RHF_H_VCRCERR 0x40000000 -#define INFINIPATH_RHF_H_PARITYERR 0x20000000 -#define INFINIPATH_RHF_H_LENERR 0x10000000 -#define INFINIPATH_RHF_H_MTUERR 0x08000000 -#define INFINIPATH_RHF_H_IHDRERR 0x04000000 -#define INFINIPATH_RHF_H_TIDERR 0x02000000 -#define INFINIPATH_RHF_H_MKERR 0x01000000 -#define INFINIPATH_RHF_H_IBERR 0x00800000 -#define INFINIPATH_RHF_H_ERR_MASK 0xFF800000 -#define INFINIPATH_RHF_L_USE_EGR 0x80000000 -#define INFINIPATH_RHF_L_SWA 0x00008000 -#define INFINIPATH_RHF_L_SWB 0x00004000 - -/* infinipath header fields */ -#define INFINIPATH_I_VERS_MASK 0xF -#define INFINIPATH_I_VERS_SHIFT 28 -#define INFINIPATH_I_PORT_MASK 0xF -#define INFINIPATH_I_PORT_SHIFT 24 -#define INFINIPATH_I_TID_MASK 0x7FF -#define INFINIPATH_I_TID_SHIFT 13 -#define INFINIPATH_I_OFFSET_MASK 0x1FFF -#define INFINIPATH_I_OFFSET_SHIFT 0 - -/* K_PktFlags bits */ -#define INFINIPATH_KPF_INTR 0x1 -#define INFINIPATH_KPF_SUBPORT_MASK 0x3 -#define INFINIPATH_KPF_SUBPORT_SHIFT 1 - -#define INFINIPATH_MAX_SUBPORT 4 - -/* SendPIO per-buffer control */ -#define INFINIPATH_SP_TEST 0x40 -#define INFINIPATH_SP_TESTEBP 0x20 -#define INFINIPATH_SP_TRIGGER_SHIFT 15 - -/* SendPIOAvail bits */ -#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1 -#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0 - -/* infinipath header format */ -struct ipath_header { - /* - * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset - - * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, - * Port 4, TID 11, offset 13. - */ - __le32 ver_port_tid_offset; - __le16 chksum; - __le16 pkt_flags; -}; - -/* infinipath user message header format. - * This structure contains the first 4 fields common to all protocols - * that employ infinipath. - */ -struct ipath_message_header { - __be16 lrh[4]; - __be32 bth[3]; - /* fields below this point are in host byte order */ - struct ipath_header iph; - __u8 sub_opcode; -}; - -/* infinipath ethernet header format */ -struct ether_header { - __be16 lrh[4]; - __be32 bth[3]; - struct ipath_header iph; - __u8 sub_opcode; - __u8 cmd; - __be16 lid; - __u16 mac[3]; - __u8 frag_num; - __u8 seq_num; - __le32 len; - /* MUST be of word size due to PIO write requirements */ - __le32 csum; - __le16 csum_offset; - __le16 flags; - __u16 first_2_bytes; - __u8 unused[2]; /* currently unused */ -}; - - -/* IB - LRH header consts */ -#define IPATH_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ -#define IPATH_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ - -/* misc. */ -#define SIZE_OF_CRC 1 - -#define IPATH_DEFAULT_P_KEY 0xFFFF -#define IPATH_PERMISSIVE_LID 0xFFFF -#define IPATH_AETH_CREDIT_SHIFT 24 -#define IPATH_AETH_CREDIT_MASK 0x1F -#define IPATH_AETH_CREDIT_INVAL 0x1F -#define IPATH_PSN_MASK 0xFFFFFF -#define IPATH_MSN_MASK 0xFFFFFF -#define IPATH_QPN_MASK 0xFFFFFF -#define IPATH_MULTICAST_LID_BASE 0xC000 -#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK -#define IPATH_MULTICAST_QPN 0xFFFFFF - -/* Receive Header Queue: receive type (from infinipath) */ -#define RCVHQ_RCV_TYPE_EXPECTED 0 -#define RCVHQ_RCV_TYPE_EAGER 1 -#define RCVHQ_RCV_TYPE_NON_KD 2 -#define RCVHQ_RCV_TYPE_ERROR 3 - - -/* sub OpCodes - ith4x */ -#define IPATH_ITH4X_OPCODE_ENCAP 0x81 -#define IPATH_ITH4X_OPCODE_LID_ARP 0x82 - -#define IPATH_HEADER_QUEUE_WORDS 9 - -/* functions for extracting fields from rcvhdrq entries for the driver. - */ -static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf) -{ - return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK; -} - -static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf) -{ - return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT) - & INFINIPATH_RHF_RCVTYPE_MASK; -} - -static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf) -{ - return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT) - & INFINIPATH_RHF_LENGTH_MASK) << 2; -} - -static inline __u32 ipath_hdrget_index(const __le32 * rbuf) -{ - return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT) - & INFINIPATH_RHF_EGRINDEX_MASK; -} - -static inline __u32 ipath_hdrget_seq(const __le32 *rbuf) -{ - return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT) - & INFINIPATH_RHF_SEQ_MASK; -} - -static inline __u32 ipath_hdrget_offset(const __le32 *rbuf) -{ - return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT) - & INFINIPATH_RHF_HDRQ_OFFSET_MASK; -} - -static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf) -{ - return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR; -} - -static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword) -{ - return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT) - & INFINIPATH_I_VERS_MASK; -} - -#endif /* _IPATH_COMMON_H */ diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_cq.c b/kernel/drivers/infiniband/hw/ipath/ipath_cq.c deleted file mode 100644 index 0416c6c0e..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_cq.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include "ipath_verbs.h" - -/** - * ipath_cq_enter - add a new entry to the completion queue - * @cq: completion queue - * @entry: work completion entry to add - * @sig: true if @entry is a solicitated entry - * - * This may be called with qp->s_lock held. - */ -void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) -{ - struct ipath_cq_wc *wc; - unsigned long flags; - u32 head; - u32 next; - - spin_lock_irqsave(&cq->lock, flags); - - /* - * Note that the head pointer might be writable by user processes. - * Take care to verify it is a sane value. - */ - wc = cq->queue; - head = wc->head; - if (head >= (unsigned) cq->ibcq.cqe) { - head = cq->ibcq.cqe; - next = 0; - } else - next = head + 1; - if (unlikely(next == wc->tail)) { - spin_unlock_irqrestore(&cq->lock, flags); - if (cq->ibcq.event_handler) { - struct ib_event ev; - - ev.device = cq->ibcq.device; - ev.element.cq = &cq->ibcq; - ev.event = IB_EVENT_CQ_ERR; - cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); - } - return; - } - if (cq->ip) { - wc->uqueue[head].wr_id = entry->wr_id; - wc->uqueue[head].status = entry->status; - wc->uqueue[head].opcode = entry->opcode; - wc->uqueue[head].vendor_err = entry->vendor_err; - wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; - wc->uqueue[head].qp_num = entry->qp->qp_num; - wc->uqueue[head].src_qp = entry->src_qp; - wc->uqueue[head].wc_flags = entry->wc_flags; - wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = entry->slid; - wc->uqueue[head].sl = entry->sl; - wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; - wc->uqueue[head].port_num = entry->port_num; - /* Make sure entry is written before the head index. */ - smp_wmb(); - } else - wc->kqueue[head] = *entry; - wc->head = next; - - if (cq->notify == IB_CQ_NEXT_COMP || - (cq->notify == IB_CQ_SOLICITED && solicited)) { - cq->notify = IB_CQ_NONE; - cq->triggered++; - /* - * This will cause send_complete() to be called in - * another thread. - */ - tasklet_hi_schedule(&cq->comptask); - } - - spin_unlock_irqrestore(&cq->lock, flags); - - if (entry->status != IB_WC_SUCCESS) - to_idev(cq->ibcq.device)->n_wqe_errs++; -} - -/** - * ipath_poll_cq - poll for work completion entries - * @ibcq: the completion queue to poll - * @num_entries: the maximum number of entries to return - * @entry: pointer to array where work completions are placed - * - * Returns the number of completion entries polled. - * - * This may be called from interrupt context. Also called by ib_poll_cq() - * in the generic verbs code. - */ -int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) -{ - struct ipath_cq *cq = to_icq(ibcq); - struct ipath_cq_wc *wc; - unsigned long flags; - int npolled; - u32 tail; - - /* The kernel can only poll a kernel completion queue */ - if (cq->ip) { - npolled = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&cq->lock, flags); - - wc = cq->queue; - tail = wc->tail; - if (tail > (u32) cq->ibcq.cqe) - tail = (u32) cq->ibcq.cqe; - for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (tail == wc->head) - break; - /* The kernel doesn't need a RMB since it has the lock. */ - *entry = wc->kqueue[tail]; - if (tail >= cq->ibcq.cqe) - tail = 0; - else - tail++; - } - wc->tail = tail; - - spin_unlock_irqrestore(&cq->lock, flags); - -bail: - return npolled; -} - -static void send_complete(unsigned long data) -{ - struct ipath_cq *cq = (struct ipath_cq *)data; - - /* - * The completion handler will most likely rearm the notification - * and poll for all pending entries. If a new completion entry - * is added while we are in this routine, tasklet_hi_schedule() - * won't call us again until we return so we check triggered to - * see if we need to call the handler again. - */ - for (;;) { - u8 triggered = cq->triggered; - - cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); - - if (cq->triggered == triggered) - return; - } -} - -/** - * ipath_create_cq - create a completion queue - * @ibdev: the device this completion queue is attached to - * @entries: the minimum size of the completion queue - * @context: unused by the InfiniPath driver - * @udata: unused by the InfiniPath driver - * - * Returns a pointer to the completion queue or negative errno values - * for failure. - * - * Called by ib_create_cq() in the generic verbs code. - */ -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cq *cq; - struct ipath_cq_wc *wc; - struct ib_cq *ret; - u32 sz; - - if (entries < 1 || entries > ib_ipath_max_cqes) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - /* Allocate the completion queue structure. */ - cq = kmalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - - /* - * Allocate the completion queue entries and head/tail pointers. - * This is allocated separately so that it can be resized and - * also mapped into user space. - * We need to use vmalloc() in order to support mmap and large - * numbers of entries. - */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (entries + 1); - else - sz += sizeof(struct ib_wc) * (entries + 1); - wc = vmalloc_user(sz); - if (!wc) { - ret = ERR_PTR(-ENOMEM); - goto bail_cq; - } - - /* - * Return the address of the WC as the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - int err; - - cq->ip = ipath_create_mmap_info(dev, sz, context, wc); - if (!cq->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wc; - } - - err = ib_copy_to_udata(udata, &cq->ip->offset, - sizeof(cq->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else - cq->ip = NULL; - - spin_lock(&dev->n_cqs_lock); - if (dev->n_cqs_allocated == ib_ipath_max_cqs) { - spin_unlock(&dev->n_cqs_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_cqs_allocated++; - spin_unlock(&dev->n_cqs_lock); - - if (cq->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - - /* - * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. - * The number of entries should be >= the number requested or return - * an error. - */ - cq->ibcq.cqe = entries; - cq->notify = IB_CQ_NONE; - cq->triggered = 0; - spin_lock_init(&cq->lock); - tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); - wc->head = 0; - wc->tail = 0; - cq->queue = wc; - - ret = &cq->ibcq; - - goto done; - -bail_ip: - kfree(cq->ip); -bail_wc: - vfree(wc); -bail_cq: - kfree(cq); -done: - return ret; -} - -/** - * ipath_destroy_cq - destroy a completion queue - * @ibcq: the completion queue to destroy. - * - * Returns 0 for success. - * - * Called by ib_destroy_cq() in the generic verbs code. - */ -int ipath_destroy_cq(struct ib_cq *ibcq) -{ - struct ipath_ibdev *dev = to_idev(ibcq->device); - struct ipath_cq *cq = to_icq(ibcq); - - tasklet_kill(&cq->comptask); - spin_lock(&dev->n_cqs_lock); - dev->n_cqs_allocated--; - spin_unlock(&dev->n_cqs_lock); - if (cq->ip) - kref_put(&cq->ip->ref, ipath_release_mmap_info); - else - vfree(cq->queue); - kfree(cq); - - return 0; -} - -/** - * ipath_req_notify_cq - change the notification type for a completion queue - * @ibcq: the completion queue - * @notify_flags: the type of notification to request - * - * Returns 0 for success. - * - * This may be called from interrupt context. Also called by - * ib_req_notify_cq() in the generic verbs code. - */ -int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) -{ - struct ipath_cq *cq = to_icq(ibcq); - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&cq->lock, flags); - /* - * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow - * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). - */ - if (cq->notify != IB_CQ_NEXT_COMP) - cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; - - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && - cq->queue->head != cq->queue->tail) - ret = 1; - - spin_unlock_irqrestore(&cq->lock, flags); - - return ret; -} - -/** - * ipath_resize_cq - change the size of the CQ - * @ibcq: the completion queue - * - * Returns 0 for success. - */ -int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) -{ - struct ipath_cq *cq = to_icq(ibcq); - struct ipath_cq_wc *old_wc; - struct ipath_cq_wc *wc; - u32 head, tail, n; - int ret; - u32 sz; - - if (cqe < 1 || cqe > ib_ipath_max_cqes) { - ret = -EINVAL; - goto bail; - } - - /* - * Need to use vmalloc() if we want to support large #s of entries. - */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); - else - sz += sizeof(struct ib_wc) * (cqe + 1); - wc = vmalloc_user(sz); - if (!wc) { - ret = -ENOMEM; - goto bail; - } - - /* Check that we can write the offset to mmap. */ - if (udata && udata->outlen >= sizeof(__u64)) { - __u64 offset = 0; - - ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (ret) - goto bail_free; - } - - spin_lock_irq(&cq->lock); - /* - * Make sure head and tail are sane since they - * might be user writable. - */ - old_wc = cq->queue; - head = old_wc->head; - if (head > (u32) cq->ibcq.cqe) - head = (u32) cq->ibcq.cqe; - tail = old_wc->tail; - if (tail > (u32) cq->ibcq.cqe) - tail = (u32) cq->ibcq.cqe; - if (head < tail) - n = cq->ibcq.cqe + 1 + head - tail; - else - n = head - tail; - if (unlikely((u32)cqe < n)) { - ret = -EINVAL; - goto bail_unlock; - } - for (n = 0; tail != head; n++) { - if (cq->ip) - wc->uqueue[n] = old_wc->uqueue[tail]; - else - wc->kqueue[n] = old_wc->kqueue[tail]; - if (tail == (u32) cq->ibcq.cqe) - tail = 0; - else - tail++; - } - cq->ibcq.cqe = cqe; - wc->head = n; - wc->tail = 0; - cq->queue = wc; - spin_unlock_irq(&cq->lock); - - vfree(old_wc); - - if (cq->ip) { - struct ipath_ibdev *dev = to_idev(ibcq->device); - struct ipath_mmap_info *ip = cq->ip; - - ipath_update_mmap_info(dev, ip, sz, wc); - - /* - * Return the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - ret = ib_copy_to_udata(udata, &ip->offset, - sizeof(ip->offset)); - if (ret) - goto bail; - } - - spin_lock_irq(&dev->pending_lock); - if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - - ret = 0; - goto bail; - -bail_unlock: - spin_unlock_irq(&cq->lock); -bail_free: - vfree(wc); -bail: - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_debug.h b/kernel/drivers/infiniband/hw/ipath/ipath_debug.h deleted file mode 100644 index 65926cd35..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_debug.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _IPATH_DEBUG_H -#define _IPATH_DEBUG_H - -#ifndef _IPATH_DEBUGGING /* debugging enabled or not */ -#define _IPATH_DEBUGGING 1 -#endif - -#if _IPATH_DEBUGGING - -/* - * Mask values for debugging. The scheme allows us to compile out any - * of the debug tracing stuff, and if compiled in, to enable or disable - * dynamically. This can be set at modprobe time also: - * modprobe infinipath.ko infinipath_debug=7 - */ - -#define __IPATH_INFO 0x1 /* generic low verbosity stuff */ -#define __IPATH_DBG 0x2 /* generic debug */ -#define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */ -/* leave some low verbosity spots open */ -#define __IPATH_VERBDBG 0x40 /* very verbose debug */ -#define __IPATH_PKTDBG 0x80 /* print packet data */ -/* print process startup (init)/exit messages */ -#define __IPATH_PROCDBG 0x100 -/* print mmap/fault stuff, not using VDBG any more */ -#define __IPATH_MMDBG 0x200 -#define __IPATH_ERRPKTDBG 0x400 -#define __IPATH_USER_SEND 0x1000 /* use user mode send */ -#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ -#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ -#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */ -#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */ -#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */ -#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */ -#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */ -#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */ - -#else /* _IPATH_DEBUGGING */ - -/* - * define all of these even with debugging off, for the few places that do - * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the - * compiler eliminate the code - */ - -#define __IPATH_INFO 0x0 /* generic low verbosity stuff */ -#define __IPATH_DBG 0x0 /* generic debug */ -#define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */ -#define __IPATH_VERBDBG 0x0 /* very verbose debug */ -#define __IPATH_PKTDBG 0x0 /* print packet data */ -#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */ -/* print mmap/fault stuff, not using VDBG any more */ -#define __IPATH_MMDBG 0x0 -#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ -#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ -#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */ -#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */ -#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */ -#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */ -#define __IPATH_LINKVERBDBG 0x0 /* very verbose linkchange debug */ - -#endif /* _IPATH_DEBUGGING */ - -#define __IPATH_VERBOSEDBG __IPATH_VERBDBG - -#endif /* _IPATH_DEBUG_H */ diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_diag.c b/kernel/drivers/infiniband/hw/ipath/ipath_diag.c deleted file mode 100644 index 45802e973..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_diag.c +++ /dev/null @@ -1,551 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file contains support for diagnostic functions. It is accessed by - * opening the ipath_diag device, normally minor number 129. Diagnostic use - * of the InfiniPath chip may render the chip or board unusable until the - * driver is unloaded, or in some cases, until the system is rebooted. - * - * Accesses to the chip through this interface are not similar to going - * through the /sys/bus/pci resource mmap interface. - */ - -#include <linux/io.h> -#include <linux/pci.h> -#include <linux/vmalloc.h> -#include <linux/fs.h> -#include <linux/export.h> -#include <asm/uaccess.h> - -#include "ipath_kernel.h" -#include "ipath_common.h" - -int ipath_diag_inuse; -static int diag_set_link; - -static int ipath_diag_open(struct inode *in, struct file *fp); -static int ipath_diag_release(struct inode *in, struct file *fp); -static ssize_t ipath_diag_read(struct file *fp, char __user *data, - size_t count, loff_t *off); -static ssize_t ipath_diag_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); - -static const struct file_operations diag_file_ops = { - .owner = THIS_MODULE, - .write = ipath_diag_write, - .read = ipath_diag_read, - .open = ipath_diag_open, - .release = ipath_diag_release, - .llseek = default_llseek, -}; - -static ssize_t ipath_diagpkt_write(struct file *fp, - const char __user *data, - size_t count, loff_t *off); - -static const struct file_operations diagpkt_file_ops = { - .owner = THIS_MODULE, - .write = ipath_diagpkt_write, - .llseek = noop_llseek, -}; - -static atomic_t diagpkt_count = ATOMIC_INIT(0); -static struct cdev *diagpkt_cdev; -static struct device *diagpkt_dev; - -int ipath_diag_add(struct ipath_devdata *dd) -{ - char name[16]; - int ret = 0; - - if (atomic_inc_return(&diagpkt_count) == 1) { - ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR, - "ipath_diagpkt", &diagpkt_file_ops, - &diagpkt_cdev, &diagpkt_dev); - - if (ret) { - ipath_dev_err(dd, "Couldn't create ipath_diagpkt " - "device: %d", ret); - goto done; - } - } - - snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit); - - ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name, - &diag_file_ops, &dd->diag_cdev, - &dd->diag_dev); - if (ret) - ipath_dev_err(dd, "Couldn't create %s device: %d", - name, ret); - -done: - return ret; -} - -void ipath_diag_remove(struct ipath_devdata *dd) -{ - if (atomic_dec_and_test(&diagpkt_count)) - ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev); - - ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev); -} - -/** - * ipath_read_umem64 - read a 64-bit quantity from the chip into user space - * @dd: the infinipath device - * @uaddr: the location to store the data in user memory - * @caddr: the source chip address (full pointer, not offset) - * @count: number of bytes to copy (multiple of 32 bits) - * - * This function also localizes all chip memory accesses. - * The copy should be written such that we read full cacheline packets - * from the chip. This is usually used for a single qword - * - * NOTE: This assumes the chip address is 64-bit aligned. - */ -static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr, - const void __iomem *caddr, size_t count) -{ - const u64 __iomem *reg_addr = caddr; - const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64)); - int ret; - - /* not very efficient, but it works for now */ - if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) { - ret = -EINVAL; - goto bail; - } - while (reg_addr < reg_end) { - u64 data = readq(reg_addr); - if (copy_to_user(uaddr, &data, sizeof(u64))) { - ret = -EFAULT; - goto bail; - } - reg_addr++; - uaddr += sizeof(u64); - } - ret = 0; -bail: - return ret; -} - -/** - * ipath_write_umem64 - write a 64-bit quantity to the chip from user space - * @dd: the infinipath device - * @caddr: the destination chip address (full pointer, not offset) - * @uaddr: the source of the data in user memory - * @count: the number of bytes to copy (multiple of 32 bits) - * - * This is usually used for a single qword - * NOTE: This assumes the chip address is 64-bit aligned. - */ - -static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr, - const void __user *uaddr, size_t count) -{ - u64 __iomem *reg_addr = caddr; - const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64)); - int ret; - - /* not very efficient, but it works for now */ - if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) { - ret = -EINVAL; - goto bail; - } - while (reg_addr < reg_end) { - u64 data; - if (copy_from_user(&data, uaddr, sizeof(data))) { - ret = -EFAULT; - goto bail; - } - writeq(data, reg_addr); - - reg_addr++; - uaddr += sizeof(u64); - } - ret = 0; -bail: - return ret; -} - -/** - * ipath_read_umem32 - read a 32-bit quantity from the chip into user space - * @dd: the infinipath device - * @uaddr: the location to store the data in user memory - * @caddr: the source chip address (full pointer, not offset) - * @count: number of bytes to copy - * - * read 32 bit values, not 64 bit; for memories that only - * support 32 bit reads; usually a single dword. - */ -static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr, - const void __iomem *caddr, size_t count) -{ - const u32 __iomem *reg_addr = caddr; - const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32)); - int ret; - - if (reg_addr < (u32 __iomem *) dd->ipath_kregbase || - reg_end > (u32 __iomem *) dd->ipath_kregend) { - ret = -EINVAL; - goto bail; - } - /* not very efficient, but it works for now */ - while (reg_addr < reg_end) { - u32 data = readl(reg_addr); - if (copy_to_user(uaddr, &data, sizeof(data))) { - ret = -EFAULT; - goto bail; - } - - reg_addr++; - uaddr += sizeof(u32); - - } - ret = 0; -bail: - return ret; -} - -/** - * ipath_write_umem32 - write a 32-bit quantity to the chip from user space - * @dd: the infinipath device - * @caddr: the destination chip address (full pointer, not offset) - * @uaddr: the source of the data in user memory - * @count: number of bytes to copy - * - * write 32 bit values, not 64 bit; for memories that only - * support 32 bit write; usually a single dword. - */ - -static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr, - const void __user *uaddr, size_t count) -{ - u32 __iomem *reg_addr = caddr; - const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32)); - int ret; - - if (reg_addr < (u32 __iomem *) dd->ipath_kregbase || - reg_end > (u32 __iomem *) dd->ipath_kregend) { - ret = -EINVAL; - goto bail; - } - while (reg_addr < reg_end) { - u32 data; - if (copy_from_user(&data, uaddr, sizeof(data))) { - ret = -EFAULT; - goto bail; - } - writel(data, reg_addr); - - reg_addr++; - uaddr += sizeof(u32); - } - ret = 0; -bail: - return ret; -} - -static int ipath_diag_open(struct inode *in, struct file *fp) -{ - int unit = iminor(in) - IPATH_DIAG_MINOR_BASE; - struct ipath_devdata *dd; - int ret; - - mutex_lock(&ipath_mutex); - - if (ipath_diag_inuse) { - ret = -EBUSY; - goto bail; - } - - dd = ipath_lookup(unit); - - if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) || - !dd->ipath_kregbase) { - ret = -ENODEV; - goto bail; - } - - fp->private_data = dd; - ipath_diag_inuse = -2; - diag_set_link = 0; - ret = 0; - - /* Only expose a way to reset the device if we - make it into diag mode. */ - ipath_expose_reset(&dd->pcidev->dev); - -bail: - mutex_unlock(&ipath_mutex); - - return ret; -} - -/** - * ipath_diagpkt_write - write an IB packet - * @fp: the diag data device file pointer - * @data: ipath_diag_pkt structure saying where to get the packet - * @count: size of data to write - * @off: unused by this code - */ -static ssize_t ipath_diagpkt_write(struct file *fp, - const char __user *data, - size_t count, loff_t *off) -{ - u32 __iomem *piobuf; - u32 plen, pbufn, maxlen_reserve; - struct ipath_diag_pkt odp; - struct ipath_diag_xpkt dp; - u32 *tmpbuf = NULL; - struct ipath_devdata *dd; - ssize_t ret = 0; - u64 val; - u32 l_state, lt_state; /* LinkState, LinkTrainingState */ - - - if (count == sizeof(dp)) { - if (copy_from_user(&dp, data, sizeof(dp))) { - ret = -EFAULT; - goto bail; - } - } else if (count == sizeof(odp)) { - if (copy_from_user(&odp, data, sizeof(odp))) { - ret = -EFAULT; - goto bail; - } - dp.len = odp.len; - dp.unit = odp.unit; - dp.data = odp.data; - dp.pbc_wd = 0; - } else { - ret = -EINVAL; - goto bail; - } - - /* send count must be an exact number of dwords */ - if (dp.len & 3) { - ret = -EINVAL; - goto bail; - } - - plen = dp.len >> 2; - - dd = ipath_lookup(dp.unit); - if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || - !dd->ipath_kregbase) { - ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n", - dp.unit); - ret = -ENODEV; - goto bail; - } - - if (ipath_diag_inuse && !diag_set_link && - !(dd->ipath_flags & IPATH_LINKACTIVE)) { - diag_set_link = 1; - ipath_cdbg(VERBOSE, "Trying to set to set link active for " - "diag pkt\n"); - ipath_set_linkstate(dd, IPATH_IB_LINKARM); - ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); - } - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit); - ret = -ENODEV; - goto bail; - } - /* - * Want to skip check for l_state if using custom PBC, - * because we might be trying to force an SM packet out. - * first-cut, skip _all_ state checking in that case. - */ - val = ipath_ib_state(dd, dd->ipath_lastibcstat); - lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat); - l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat); - if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP || - (val != dd->ib_init && val != dd->ib_arm && - val != dd->ib_active))) { - ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", - dd->ipath_unit, (unsigned long long) val); - ret = -EINVAL; - goto bail; - } - - /* - * need total length before first word written, plus 2 Dwords. One Dword - * is for padding so we get the full user data when not aligned on - * a word boundary. The other Dword is to make sure we have room for the - * ICRC which gets tacked on later. - */ - maxlen_reserve = 2 * sizeof(u32); - if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) { - ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", - dp.len, dd->ipath_ibmaxlen); - ret = -EINVAL; - goto bail; - } - - plen = sizeof(u32) + dp.len; - - tmpbuf = vmalloc(plen); - if (!tmpbuf) { - dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " - "failing\n"); - ret = -ENOMEM; - goto bail; - } - - if (copy_from_user(tmpbuf, - (const void __user *) (unsigned long) dp.data, - dp.len)) { - ret = -EFAULT; - goto bail; - } - - plen >>= 2; /* in dwords */ - - piobuf = ipath_getpiobuf(dd, plen, &pbufn); - if (!piobuf) { - ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n", - dd->ipath_unit); - ret = -EBUSY; - goto bail; - } - /* disarm it just to be extra sure */ - ipath_disarm_piobufs(dd, pbufn, 1); - - if (ipath_debug & __IPATH_PKTDBG) - ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", - dd->ipath_unit, plen - 1, pbufn); - - if (dp.pbc_wd == 0) - dp.pbc_wd = plen; - writeq(dp.pbc_wd, piobuf); - /* - * Copy all by the trigger word, then flush, so it's written - * to chip before trigger word, then write trigger word, then - * flush again, so packet is sent. - */ - if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { - ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1); - ipath_flush_wc(); - __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); - } else - __iowrite32_copy(piobuf + 2, tmpbuf, plen); - - ipath_flush_wc(); - - ret = sizeof(dp); - -bail: - vfree(tmpbuf); - return ret; -} - -static int ipath_diag_release(struct inode *in, struct file *fp) -{ - mutex_lock(&ipath_mutex); - ipath_diag_inuse = 0; - fp->private_data = NULL; - mutex_unlock(&ipath_mutex); - return 0; -} - -static ssize_t ipath_diag_read(struct file *fp, char __user *data, - size_t count, loff_t *off) -{ - struct ipath_devdata *dd = fp->private_data; - void __iomem *kreg_base; - ssize_t ret; - - kreg_base = dd->ipath_kregbase; - - if (count == 0) - ret = 0; - else if ((count % 4) || (*off % 4)) - /* address or length is not 32-bit aligned, hence invalid */ - ret = -EINVAL; - else if (ipath_diag_inuse < 1 && (*off || count != 8)) - ret = -EINVAL; /* prevent cat /dev/ipath_diag* */ - else if ((count % 8) || (*off % 8)) - /* address or length not 64-bit aligned; do 32-bit reads */ - ret = ipath_read_umem32(dd, data, kreg_base + *off, count); - else - ret = ipath_read_umem64(dd, data, kreg_base + *off, count); - - if (ret >= 0) { - *off += count; - ret = count; - if (ipath_diag_inuse == -2) - ipath_diag_inuse++; - } - - return ret; -} - -static ssize_t ipath_diag_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct ipath_devdata *dd = fp->private_data; - void __iomem *kreg_base; - ssize_t ret; - - kreg_base = dd->ipath_kregbase; - - if (count == 0) - ret = 0; - else if ((count % 4) || (*off % 4)) - /* address or length is not 32-bit aligned, hence invalid */ - ret = -EINVAL; - else if ((ipath_diag_inuse == -1 && (*off || count != 8)) || - ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */ - ret = -EINVAL; /* before any other write allowed */ - else if ((count % 8) || (*off % 8)) - /* address or length not 64-bit aligned; do 32-bit writes */ - ret = ipath_write_umem32(dd, kreg_base + *off, data, count); - else - ret = ipath_write_umem64(dd, kreg_base + *off, data, count); - - if (ret >= 0) { - *off += count; - ret = count; - if (ipath_diag_inuse == -1) - ipath_diag_inuse = 1; /* all read/write OK now */ - } - - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_dma.c b/kernel/drivers/infiniband/hw/ipath/ipath_dma.c deleted file mode 100644 index 123a8c053..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_dma.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2006 QLogic, Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/scatterlist.h> -#include <linux/gfp.h> -#include <rdma/ib_verbs.h> - -#include "ipath_verbs.h" - -#define BAD_DMA_ADDRESS ((u64) 0) - -/* - * The following functions implement driver specific replacements - * for the ib_dma_*() functions. - * - * These functions return kernel virtual addresses instead of - * device bus addresses since the driver uses the CPU to copy - * data instead of using hardware DMA. - */ - -static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr) -{ - return dma_addr == BAD_DMA_ADDRESS; -} - -static u64 ipath_dma_map_single(struct ib_device *dev, - void *cpu_addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); - return (u64) cpu_addr; -} - -static void ipath_dma_unmap_single(struct ib_device *dev, - u64 addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - -static u64 ipath_dma_map_page(struct ib_device *dev, - struct page *page, - unsigned long offset, - size_t size, - enum dma_data_direction direction) -{ - u64 addr; - - BUG_ON(!valid_dma_direction(direction)); - - if (offset + size > PAGE_SIZE) { - addr = BAD_DMA_ADDRESS; - goto done; - } - - addr = (u64) page_address(page); - if (addr) - addr += offset; - /* TODO: handle highmem pages */ - -done: - return addr; -} - -static void ipath_dma_unmap_page(struct ib_device *dev, - u64 addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - -static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction) -{ - struct scatterlist *sg; - u64 addr; - int i; - int ret = nents; - - BUG_ON(!valid_dma_direction(direction)); - - for_each_sg(sgl, sg, nents, i) { - addr = (u64) page_address(sg_page(sg)); - /* TODO: handle highmem pages */ - if (!addr) { - ret = 0; - break; - } - sg->dma_address = addr + sg->offset; -#ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->dma_length = sg->length; -#endif - } - return ret; -} - -static void ipath_unmap_sg(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - -static void ipath_sync_single_for_cpu(struct ib_device *dev, - u64 addr, - size_t size, - enum dma_data_direction dir) -{ -} - -static void ipath_sync_single_for_device(struct ib_device *dev, - u64 addr, - size_t size, - enum dma_data_direction dir) -{ -} - -static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size, - u64 *dma_handle, gfp_t flag) -{ - struct page *p; - void *addr = NULL; - - p = alloc_pages(flag, get_order(size)); - if (p) - addr = page_address(p); - if (dma_handle) - *dma_handle = (u64) addr; - return addr; -} - -static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, - void *cpu_addr, u64 dma_handle) -{ - free_pages((unsigned long) cpu_addr, get_order(size)); -} - -struct ib_dma_mapping_ops ipath_dma_mapping_ops = { - .mapping_error = ipath_mapping_error, - .map_single = ipath_dma_map_single, - .unmap_single = ipath_dma_unmap_single, - .map_page = ipath_dma_map_page, - .unmap_page = ipath_dma_unmap_page, - .map_sg = ipath_map_sg, - .unmap_sg = ipath_unmap_sg, - .sync_single_for_cpu = ipath_sync_single_for_cpu, - .sync_single_for_device = ipath_sync_single_for_device, - .alloc_coherent = ipath_dma_alloc_coherent, - .free_coherent = ipath_dma_free_coherent -}; diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_driver.c b/kernel/drivers/infiniband/hw/ipath/ipath_driver.c deleted file mode 100644 index bd0caedaf..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_driver.c +++ /dev/null @@ -1,2779 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/idr.h> -#include <linux/pci.h> -#include <linux/io.h> -#include <linux/delay.h> -#include <linux/netdevice.h> -#include <linux/vmalloc.h> -#include <linux/bitmap.h> -#include <linux/slab.h> -#include <linux/module.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" - -static void ipath_update_pio_bufs(struct ipath_devdata *); - -const char *ipath_get_unit_name(int unit) -{ - static char iname[16]; - snprintf(iname, sizeof iname, "infinipath%u", unit); - return iname; -} - -#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " -#define PFX IPATH_DRV_NAME ": " - -/* - * The size has to be longer than this string, so we can append - * board/chip information to it in the init code. - */ -const char ib_ipath_version[] = IPATH_IDSTR "\n"; - -static struct idr unit_table; -DEFINE_SPINLOCK(ipath_devs_lock); -LIST_HEAD(ipath_dev_list); - -wait_queue_head_t ipath_state_wait; - -unsigned ipath_debug = __IPATH_INFO; - -module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(debug, "mask for debug prints"); -EXPORT_SYMBOL_GPL(ipath_debug); - -unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ -module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); -MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); - -static unsigned ipath_hol_timeout_ms = 13000; -module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); -MODULE_PARM_DESC(hol_timeout_ms, - "duration of user app suspension after link failure"); - -unsigned ipath_linkrecovery = 1; -module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("QLogic <support@qlogic.com>"); -MODULE_DESCRIPTION("QLogic InfiniPath driver"); - -/* - * Table to translate the LINKTRAININGSTATE portion of - * IBCStatus to a human-readable form. - */ -const char *ipath_ibcstatus_str[] = { - "Disabled", - "LinkUp", - "PollActive", - "PollQuiet", - "SleepDelay", - "SleepQuiet", - "LState6", /* unused */ - "LState7", /* unused */ - "CfgDebounce", - "CfgRcvfCfg", - "CfgWaitRmt", - "CfgIdle", - "RecovRetrain", - "CfgTxRevLane", /* unused before IBA7220 */ - "RecovWaitRmt", - "RecovIdle", - /* below were added for IBA7220 */ - "CfgEnhanced", - "CfgTest", - "CfgWaitRmtTest", - "CfgWaitCfgEnhanced", - "SendTS_T", - "SendTstIdles", - "RcvTS_T", - "SendTst_TS1s", - "LTState18", "LTState19", "LTState1A", "LTState1B", - "LTState1C", "LTState1D", "LTState1E", "LTState1F" -}; - -static void ipath_remove_one(struct pci_dev *); -static int ipath_init_one(struct pci_dev *, const struct pci_device_id *); - -/* Only needed for registration, nothing else needs this info */ -#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 -#define PCI_DEVICE_ID_INFINIPATH_HT 0xd - -/* Number of seconds before our card status check... */ -#define STATUS_TIMEOUT 60 - -static const struct pci_device_id ipath_pci_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, - { 0, } -}; - -MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); - -static struct pci_driver ipath_driver = { - .name = IPATH_DRV_NAME, - .probe = ipath_init_one, - .remove = ipath_remove_one, - .id_table = ipath_pci_tbl, - .driver = { - .groups = ipath_driver_attr_groups, - }, -}; - -static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, - u32 *bar0, u32 *bar1) -{ - int ret; - - ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0); - if (ret) - ipath_dev_err(dd, "failed to read bar0 before enable: " - "error %d\n", -ret); - - ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1); - if (ret) - ipath_dev_err(dd, "failed to read bar1 before enable: " - "error %d\n", -ret); - - ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1); -} - -static void ipath_free_devdata(struct pci_dev *pdev, - struct ipath_devdata *dd) -{ - unsigned long flags; - - pci_set_drvdata(pdev, NULL); - - if (dd->ipath_unit != -1) { - spin_lock_irqsave(&ipath_devs_lock, flags); - idr_remove(&unit_table, dd->ipath_unit); - list_del(&dd->ipath_list); - spin_unlock_irqrestore(&ipath_devs_lock, flags); - } - vfree(dd); -} - -static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) -{ - unsigned long flags; - struct ipath_devdata *dd; - int ret; - - dd = vzalloc(sizeof(*dd)); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd->ipath_unit = -1; - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&ipath_devs_lock, flags); - - ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME - ": Could not allocate unit ID: error %d\n", -ret); - ipath_free_devdata(pdev, dd); - dd = ERR_PTR(ret); - goto bail_unlock; - } - dd->ipath_unit = ret; - - dd->pcidev = pdev; - pci_set_drvdata(pdev, dd); - - list_add(&dd->ipath_list, &ipath_dev_list); - -bail_unlock: - spin_unlock_irqrestore(&ipath_devs_lock, flags); - idr_preload_end(); -bail: - return dd; -} - -static inline struct ipath_devdata *__ipath_lookup(int unit) -{ - return idr_find(&unit_table, unit); -} - -struct ipath_devdata *ipath_lookup(int unit) -{ - struct ipath_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&ipath_devs_lock, flags); - dd = __ipath_lookup(unit); - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - return dd; -} - -int ipath_count_units(int *npresentp, int *nupp, int *maxportsp) -{ - int nunits, npresent, nup; - struct ipath_devdata *dd; - unsigned long flags; - int maxports; - - nunits = npresent = nup = maxports = 0; - - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry(dd, &ipath_dev_list, ipath_list) { - nunits++; - if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase) - npresent++; - if (dd->ipath_lid && - !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN - | IPATH_LINKUNK))) - nup++; - if (dd->ipath_cfgports > maxports) - maxports = dd->ipath_cfgports; - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - if (npresentp) - *npresentp = npresent; - if (nupp) - *nupp = nup; - if (maxportsp) - *maxportsp = maxports; - - return nunits; -} - -/* - * These next two routines are placeholders in case we don't have per-arch - * code for controlling write combining. If explicit control of write - * combining is not available, performance will probably be awful. - */ - -int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd) -{ - return -EOPNOTSUPP; -} - -void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd) -{ -} - -/* - * Perform a PIO buffer bandwidth write test, to verify proper system - * configuration. Even when all the setup calls work, occasionally - * BIOS or other issues can prevent write combining from working, or - * can cause other bandwidth problems to the chip. - * - * This test simply writes the same buffer over and over again, and - * measures close to the peak bandwidth to the chip (not testing - * data bandwidth to the wire). On chips that use an address-based - * trigger to send packets to the wire, this is easy. On chips that - * use a count to trigger, we want to make sure that the packet doesn't - * go out on the wire, or trigger flow control checks. - */ -static void ipath_verify_pioperf(struct ipath_devdata *dd) -{ - u32 pbnum, cnt, lcnt; - u32 __iomem *piobuf; - u32 *addr; - u64 msecs, emsecs; - - piobuf = ipath_getpiobuf(dd, 0, &pbnum); - if (!piobuf) { - dev_info(&dd->pcidev->dev, - "No PIObufs for checking perf, skipping\n"); - return; - } - - /* - * Enough to give us a reasonable test, less than piobuf size, and - * likely multiple of store buffer length. - */ - cnt = 1024; - - addr = vmalloc(cnt); - if (!addr) { - dev_info(&dd->pcidev->dev, - "Couldn't get memory for checking PIO perf," - " skipping\n"); - goto done; - } - - preempt_disable(); /* we want reasonably accurate elapsed time */ - msecs = 1 + jiffies_to_msecs(jiffies); - for (lcnt = 0; lcnt < 10000U; lcnt++) { - /* wait until we cross msec boundary */ - if (jiffies_to_msecs(jiffies) >= msecs) - break; - udelay(1); - } - - ipath_disable_armlaunch(dd); - - /* - * length 0, no dwords actually sent, and mark as VL15 - * on chips where that may matter (due to IB flowcontrol) - */ - if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) - writeq(1UL << 63, piobuf); - else - writeq(0, piobuf); - ipath_flush_wc(); - - /* - * this is only roughly accurate, since even with preempt we - * still take interrupts that could take a while. Running for - * >= 5 msec seems to get us "close enough" to accurate values - */ - msecs = jiffies_to_msecs(jiffies); - for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { - __iowrite32_copy(piobuf + 64, addr, cnt >> 2); - emsecs = jiffies_to_msecs(jiffies) - msecs; - } - - /* 1 GiB/sec, slightly over IB SDR line rate */ - if (lcnt < (emsecs * 1024U)) - ipath_dev_err(dd, - "Performance problem: bandwidth to PIO buffers is " - "only %u MiB/sec\n", - lcnt / (u32) emsecs); - else - ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n", - lcnt / (u32) emsecs); - - preempt_enable(); - - vfree(addr); - -done: - /* disarm piobuf, so it's available again */ - ipath_disarm_piobufs(dd, pbnum, 1); - ipath_enable_armlaunch(dd); -} - -static void cleanup_device(struct ipath_devdata *dd); - -static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - int ret, len, j; - struct ipath_devdata *dd; - unsigned long long addr; - u32 bar0 = 0, bar1 = 0; - - dd = ipath_alloc_devdata(pdev); - if (IS_ERR(dd)) { - ret = PTR_ERR(dd); - printk(KERN_ERR IPATH_DRV_NAME - ": Could not allocate devdata: error %d\n", -ret); - goto bail; - } - - ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); - - ret = pci_enable_device(pdev); - if (ret) { - /* This can happen iff: - * - * We did a chip reset, and then failed to reprogram the - * BAR, or the chip reset due to an internal error. We then - * unloaded the driver and reloaded it. - * - * Both reset cases set the BAR back to initial state. For - * the latter case, the AER sticky error bit at offset 0x718 - * should be set, but the Linux kernel doesn't yet know - * about that, it appears. If the original BAR was retained - * in the kernel data structures, this may be OK. - */ - ipath_dev_err(dd, "enable unit %d failed: error %d\n", - dd->ipath_unit, -ret); - goto bail_devdata; - } - addr = pci_resource_start(pdev, 0); - len = pci_resource_len(pdev, 0); - ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x " - "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, - ent->device, ent->driver_data); - - read_bars(dd, pdev, &bar0, &bar1); - - if (!bar1 && !(bar0 & ~0xf)) { - if (addr) { - dev_info(&pdev->dev, "BAR is 0 (probable RESET), " - "rewriting as %llx\n", addr); - ret = pci_write_config_dword( - pdev, PCI_BASE_ADDRESS_0, addr); - if (ret) { - ipath_dev_err(dd, "rewrite of BAR0 " - "failed: err %d\n", -ret); - goto bail_disable; - } - ret = pci_write_config_dword( - pdev, PCI_BASE_ADDRESS_1, addr >> 32); - if (ret) { - ipath_dev_err(dd, "rewrite of BAR1 " - "failed: err %d\n", -ret); - goto bail_disable; - } - } else { - ipath_dev_err(dd, "BAR is 0 (probable RESET), " - "not usable until reboot\n"); - ret = -ENODEV; - goto bail_disable; - } - } - - ret = pci_request_regions(pdev, IPATH_DRV_NAME); - if (ret) { - dev_info(&pdev->dev, "pci_request_regions unit %u fails: " - "err %d\n", dd->ipath_unit, -ret); - goto bail_disable; - } - - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - /* - * if the 64 bit setup fails, try 32 bit. Some systems - * do not setup 64 bit maps on systems with 2GB or less - * memory installed. - */ - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (ret) { - dev_info(&pdev->dev, - "Unable to set DMA mask for unit %u: %d\n", - dd->ipath_unit, ret); - goto bail_regions; - } - else { - ipath_dbg("No 64bit DMA mask, used 32 bit mask\n"); - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (ret) - dev_info(&pdev->dev, - "Unable to set DMA consistent mask " - "for unit %u: %d\n", - dd->ipath_unit, ret); - - } - } - else { - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) - dev_info(&pdev->dev, - "Unable to set DMA consistent mask " - "for unit %u: %d\n", - dd->ipath_unit, ret); - } - - pci_set_master(pdev); - - /* - * Save BARs to rewrite after device reset. Save all 64 bits of - * BAR, just in case. - */ - dd->ipath_pcibar0 = addr; - dd->ipath_pcibar1 = addr >> 32; - dd->ipath_deviceid = ent->device; /* save for later use */ - dd->ipath_vendorid = ent->vendor; - - /* setup the chip-specific functions, as early as possible. */ - switch (ent->device) { - case PCI_DEVICE_ID_INFINIPATH_HT: - ipath_init_iba6110_funcs(dd); - break; - - default: - ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " - "failing\n", ent->device); - return -ENODEV; - } - - for (j = 0; j < 6; j++) { - if (!pdev->resource[j].start) - continue; - ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n", - j, &pdev->resource[j], - (unsigned long long)pci_resource_len(pdev, j)); - } - - if (!addr) { - ipath_dev_err(dd, "No valid address in BAR 0!\n"); - ret = -ENODEV; - goto bail_regions; - } - - dd->ipath_pcirev = pdev->revision; - -#if defined(__powerpc__) - /* There isn't a generic way to specify writethrough mappings */ - dd->ipath_kregbase = __ioremap(addr, len, - (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); -#else - dd->ipath_kregbase = ioremap_nocache(addr, len); -#endif - - if (!dd->ipath_kregbase) { - ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", - addr); - ret = -ENOMEM; - goto bail_iounmap; - } - dd->ipath_kregend = (u64 __iomem *) - ((void __iomem *)dd->ipath_kregbase + len); - dd->ipath_physaddr = addr; /* used for io_remap, etc. */ - /* for user mmap */ - ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", - addr, dd->ipath_kregbase); - - if (dd->ipath_f_bus(dd, pdev)) - ipath_dev_err(dd, "Failed to setup config space; " - "continuing anyway\n"); - - /* - * set up our interrupt handler; IRQF_SHARED probably not needed, - * since MSI interrupts shouldn't be shared but won't hurt for now. - * check 0 irq after we return from chip-specific bus setup, since - * that can affect this due to setup - */ - if (!dd->ipath_irq) - ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); - else { - ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, - IPATH_DRV_NAME, dd); - if (ret) { - ipath_dev_err(dd, "Couldn't setup irq handler, " - "irq=%d: %d\n", dd->ipath_irq, ret); - goto bail_iounmap; - } - } - - ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ - if (ret) - goto bail_irqsetup; - - ret = ipath_enable_wc(dd); - - if (ret) { - ipath_dev_err(dd, "Write combining not enabled " - "(err %d): performance may be poor\n", - -ret); - ret = 0; - } - - ipath_verify_pioperf(dd); - - ipath_device_create_group(&pdev->dev, dd); - ipathfs_add_device(dd); - ipath_user_add(dd); - ipath_diag_add(dd); - ipath_register_ib_device(dd); - - goto bail; - -bail_irqsetup: - cleanup_device(dd); - - if (dd->ipath_irq) - dd->ipath_f_free_irq(dd); - - if (dd->ipath_f_cleanup) - dd->ipath_f_cleanup(dd); - -bail_iounmap: - iounmap((volatile void __iomem *) dd->ipath_kregbase); - -bail_regions: - pci_release_regions(pdev); - -bail_disable: - pci_disable_device(pdev); - -bail_devdata: - ipath_free_devdata(pdev, dd); - -bail: - return ret; -} - -static void cleanup_device(struct ipath_devdata *dd) -{ - int port; - struct ipath_portdata **tmp; - unsigned long flags; - - if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { - /* can't do anything more with chip; needs re-init */ - *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; - if (dd->ipath_kregbase) { - /* - * if we haven't already cleaned up before these are - * to ensure any register reads/writes "fail" until - * re-init - */ - dd->ipath_kregbase = NULL; - dd->ipath_uregbase = 0; - dd->ipath_sregbase = 0; - dd->ipath_cregbase = 0; - dd->ipath_kregsize = 0; - } - ipath_disable_wc(dd); - } - - if (dd->ipath_spectriggerhit) - dev_info(&dd->pcidev->dev, "%lu special trigger hits\n", - dd->ipath_spectriggerhit); - - if (dd->ipath_pioavailregs_dma) { - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *) dd->ipath_pioavailregs_dma, - dd->ipath_pioavailregs_phys); - dd->ipath_pioavailregs_dma = NULL; - } - if (dd->ipath_dummy_hdrq) { - dma_free_coherent(&dd->pcidev->dev, - dd->ipath_pd[0]->port_rcvhdrq_size, - dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); - dd->ipath_dummy_hdrq = NULL; - } - - if (dd->ipath_pageshadow) { - struct page **tmpp = dd->ipath_pageshadow; - dma_addr_t *tmpd = dd->ipath_physshadow; - int i, cnt = 0; - - ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " - "locked\n"); - for (port = 0; port < dd->ipath_cfgports; port++) { - int port_tidbase = port * dd->ipath_rcvtidcnt; - int maxtid = port_tidbase + dd->ipath_rcvtidcnt; - for (i = port_tidbase; i < maxtid; i++) { - if (!tmpp[i]) - continue; - pci_unmap_page(dd->pcidev, tmpd[i], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - ipath_release_user_pages(&tmpp[i], 1); - tmpp[i] = NULL; - cnt++; - } - } - if (cnt) { - ipath_stats.sps_pageunlocks += cnt; - ipath_cdbg(VERBOSE, "There were still %u expTID " - "entries locked\n", cnt); - } - if (ipath_stats.sps_pagelocks || - ipath_stats.sps_pageunlocks) - ipath_cdbg(VERBOSE, "%llu pages locked, %llu " - "unlocked via ipath_m{un}lock\n", - (unsigned long long) - ipath_stats.sps_pagelocks, - (unsigned long long) - ipath_stats.sps_pageunlocks); - - ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", - dd->ipath_pageshadow); - tmpp = dd->ipath_pageshadow; - dd->ipath_pageshadow = NULL; - vfree(tmpp); - - dd->ipath_egrtidbase = NULL; - } - - /* - * free any resources still in use (usually just kernel ports) - * at unload; we do for portcnt, because that's what we allocate. - * We acquire lock to be really paranoid that ipath_pd isn't being - * accessed from some interrupt-related code (that should not happen, - * but best to be sure). - */ - spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); - tmp = dd->ipath_pd; - dd->ipath_pd = NULL; - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - for (port = 0; port < dd->ipath_portcnt; port++) { - struct ipath_portdata *pd = tmp[port]; - tmp[port] = NULL; /* debugging paranoia */ - ipath_free_pddata(dd, pd); - } - kfree(tmp); -} - -static void ipath_remove_one(struct pci_dev *pdev) -{ - struct ipath_devdata *dd = pci_get_drvdata(pdev); - - ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); - - /* - * disable the IB link early, to be sure no new packets arrive, which - * complicates the shutdown process - */ - ipath_shutdown_device(dd); - - flush_workqueue(ib_wq); - - if (dd->verbs_dev) - ipath_unregister_ib_device(dd->verbs_dev); - - ipath_diag_remove(dd); - ipath_user_remove(dd); - ipathfs_remove_device(dd); - ipath_device_remove_group(&pdev->dev, dd); - - ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " - "unit %u\n", dd, (u32) dd->ipath_unit); - - cleanup_device(dd); - - /* - * turn off rcv, send, and interrupts for all ports, all drivers - * should also hard reset the chip here? - * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs - * for all versions of the driver, if they were allocated - */ - if (dd->ipath_irq) { - ipath_cdbg(VERBOSE, "unit %u free irq %d\n", - dd->ipath_unit, dd->ipath_irq); - dd->ipath_f_free_irq(dd); - } else - ipath_dbg("irq is 0, not doing free_irq " - "for unit %u\n", dd->ipath_unit); - /* - * we check for NULL here, because it's outside - * the kregbase check, and we need to call it - * after the free_irq. Thus it's possible that - * the function pointers were never initialized. - */ - if (dd->ipath_f_cleanup) - /* clean up chip-specific stuff */ - dd->ipath_f_cleanup(dd); - - ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); - iounmap((volatile void __iomem *) dd->ipath_kregbase); - pci_release_regions(pdev); - ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); - pci_disable_device(pdev); - - ipath_free_devdata(pdev, dd); -} - -/* general driver use */ -DEFINE_MUTEX(ipath_mutex); - -static DEFINE_SPINLOCK(ipath_pioavail_lock); - -/** - * ipath_disarm_piobufs - cancel a range of PIO buffers - * @dd: the infinipath device - * @first: the first PIO buffer to cancel - * @cnt: the number of PIO buffers to cancel - * - * cancel a range of PIO buffers, used when they might be armed, but - * not triggered. Used at init to ensure buffer state, and also user - * process close, in case it died while writing to a PIO buffer - * Also after errors. - */ -void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, - unsigned cnt) -{ - unsigned i, last = first + cnt; - unsigned long flags; - - ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); - for (i = first; i < last; i++) { - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - /* - * The disarm-related bits are write-only, so it - * is ok to OR them in with our copy of sendctrl - * while we hold the lock. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl | INFINIPATH_S_DISARM | - (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); - /* can't disarm bufs back-to-back per iba7220 spec */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } - /* on some older chips, update may not happen after cancel */ - ipath_force_pio_avail_update(dd); -} - -/** - * ipath_wait_linkstate - wait for an IB link state change to occur - * @dd: the infinipath device - * @state: the state to wait for - * @msecs: the number of milliseconds to wait - * - * wait up to msecs milliseconds for IB link state change to occur for - * now, take the easy polling route. Currently used only by - * ipath_set_linkstate. Returns 0 if state reached, otherwise - * -ETIMEDOUT state can have multiple states set, for any of several - * transitions. - */ -int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) -{ - dd->ipath_state_wanted = state; - wait_event_interruptible_timeout(ipath_state_wait, - (dd->ipath_flags & state), - msecs_to_jiffies(msecs)); - dd->ipath_state_wanted = 0; - - if (!(dd->ipath_flags & state)) { - u64 val; - ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" - " ms\n", - /* test INIT ahead of DOWN, both can be set */ - (state & IPATH_LINKINIT) ? "INIT" : - ((state & IPATH_LINKDOWN) ? "DOWN" : - ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")), - msecs); - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n", - (unsigned long long) ipath_read_kreg64( - dd, dd->ipath_kregs->kr_ibcctrl), - (unsigned long long) val, - ipath_ibcstatus_str[val & dd->ibcs_lts_mask]); - } - return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; -} - -static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, - char *buf, size_t blen) -{ - static const struct { - ipath_err_t err; - const char *msg; - } errs[] = { - { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, - { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, - { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, - { INFINIPATH_E_SDMABASE, "SDmaBase" }, - { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, - { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, - { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, - { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, - { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, - { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, - { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, - { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, - }; - int i; - int expected; - size_t bidx = 0; - - for (i = 0; i < ARRAY_SIZE(errs); i++) { - expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : - test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - if ((err & errs[i].err) && !expected) - bidx += snprintf(buf + bidx, blen - bidx, - "%s ", errs[i].msg); - } -} - -/* - * Decode the error status into strings, deciding whether to always - * print * it or not depending on "normal packet errors" vs everything - * else. Return 1 if "real" errors, otherwise 0 if only packet - * errors, so caller can decide what to print with the string. - */ -int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, - ipath_err_t err) -{ - int iserr = 1; - *buf = '\0'; - if (err & INFINIPATH_E_PKTERRS) { - if (!(err & ~INFINIPATH_E_PKTERRS)) - iserr = 0; // if only packet errors. - if (ipath_debug & __IPATH_ERRPKTDBG) { - if (err & INFINIPATH_E_REBP) - strlcat(buf, "EBP ", blen); - if (err & INFINIPATH_E_RVCRC) - strlcat(buf, "VCRC ", blen); - if (err & INFINIPATH_E_RICRC) { - strlcat(buf, "CRC ", blen); - // clear for check below, so only once - err &= INFINIPATH_E_RICRC; - } - if (err & INFINIPATH_E_RSHORTPKTLEN) - strlcat(buf, "rshortpktlen ", blen); - if (err & INFINIPATH_E_SDROPPEDDATAPKT) - strlcat(buf, "sdroppeddatapkt ", blen); - if (err & INFINIPATH_E_SPKTLEN) - strlcat(buf, "spktlen ", blen); - } - if ((err & INFINIPATH_E_RICRC) && - !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) - strlcat(buf, "CRC ", blen); - if (!iserr) - goto done; - } - if (err & INFINIPATH_E_RHDRLEN) - strlcat(buf, "rhdrlen ", blen); - if (err & INFINIPATH_E_RBADTID) - strlcat(buf, "rbadtid ", blen); - if (err & INFINIPATH_E_RBADVERSION) - strlcat(buf, "rbadversion ", blen); - if (err & INFINIPATH_E_RHDR) - strlcat(buf, "rhdr ", blen); - if (err & INFINIPATH_E_SENDSPECIALTRIGGER) - strlcat(buf, "sendspecialtrigger ", blen); - if (err & INFINIPATH_E_RLONGPKTLEN) - strlcat(buf, "rlongpktlen ", blen); - if (err & INFINIPATH_E_RMAXPKTLEN) - strlcat(buf, "rmaxpktlen ", blen); - if (err & INFINIPATH_E_RMINPKTLEN) - strlcat(buf, "rminpktlen ", blen); - if (err & INFINIPATH_E_SMINPKTLEN) - strlcat(buf, "sminpktlen ", blen); - if (err & INFINIPATH_E_RFORMATERR) - strlcat(buf, "rformaterr ", blen); - if (err & INFINIPATH_E_RUNSUPVL) - strlcat(buf, "runsupvl ", blen); - if (err & INFINIPATH_E_RUNEXPCHAR) - strlcat(buf, "runexpchar ", blen); - if (err & INFINIPATH_E_RIBFLOW) - strlcat(buf, "ribflow ", blen); - if (err & INFINIPATH_E_SUNDERRUN) - strlcat(buf, "sunderrun ", blen); - if (err & INFINIPATH_E_SPIOARMLAUNCH) - strlcat(buf, "spioarmlaunch ", blen); - if (err & INFINIPATH_E_SUNEXPERRPKTNUM) - strlcat(buf, "sunexperrpktnum ", blen); - if (err & INFINIPATH_E_SDROPPEDSMPPKT) - strlcat(buf, "sdroppedsmppkt ", blen); - if (err & INFINIPATH_E_SMAXPKTLEN) - strlcat(buf, "smaxpktlen ", blen); - if (err & INFINIPATH_E_SUNSUPVL) - strlcat(buf, "sunsupVL ", blen); - if (err & INFINIPATH_E_INVALIDADDR) - strlcat(buf, "invalidaddr ", blen); - if (err & INFINIPATH_E_RRCVEGRFULL) - strlcat(buf, "rcvegrfull ", blen); - if (err & INFINIPATH_E_RRCVHDRFULL) - strlcat(buf, "rcvhdrfull ", blen); - if (err & INFINIPATH_E_IBSTATUSCHANGED) - strlcat(buf, "ibcstatuschg ", blen); - if (err & INFINIPATH_E_RIBLOSTLINK) - strlcat(buf, "riblostlink ", blen); - if (err & INFINIPATH_E_HARDWARE) - strlcat(buf, "hardware ", blen); - if (err & INFINIPATH_E_RESET) - strlcat(buf, "reset ", blen); - if (err & INFINIPATH_E_SDMAERRS) - decode_sdma_errs(dd, err, buf, blen); - if (err & INFINIPATH_E_INVALIDEEPCMD) - strlcat(buf, "invalideepromcmd ", blen); -done: - return iserr; -} - -/** - * get_rhf_errstring - decode RHF errors - * @err: the err number - * @msg: the output buffer - * @len: the length of the output buffer - * - * only used one place now, may want more later - */ -static void get_rhf_errstring(u32 err, char *msg, size_t len) -{ - /* if no errors, and so don't need to check what's first */ - *msg = '\0'; - - if (err & INFINIPATH_RHF_H_ICRCERR) - strlcat(msg, "icrcerr ", len); - if (err & INFINIPATH_RHF_H_VCRCERR) - strlcat(msg, "vcrcerr ", len); - if (err & INFINIPATH_RHF_H_PARITYERR) - strlcat(msg, "parityerr ", len); - if (err & INFINIPATH_RHF_H_LENERR) - strlcat(msg, "lenerr ", len); - if (err & INFINIPATH_RHF_H_MTUERR) - strlcat(msg, "mtuerr ", len); - if (err & INFINIPATH_RHF_H_IHDRERR) - /* infinipath hdr checksum error */ - strlcat(msg, "ipathhdrerr ", len); - if (err & INFINIPATH_RHF_H_TIDERR) - strlcat(msg, "tiderr ", len); - if (err & INFINIPATH_RHF_H_MKERR) - /* bad port, offset, etc. */ - strlcat(msg, "invalid ipathhdr ", len); - if (err & INFINIPATH_RHF_H_IBERR) - strlcat(msg, "iberr ", len); - if (err & INFINIPATH_RHF_L_SWA) - strlcat(msg, "swA ", len); - if (err & INFINIPATH_RHF_L_SWB) - strlcat(msg, "swB ", len); -} - -/** - * ipath_get_egrbuf - get an eager buffer - * @dd: the infinipath device - * @bufnum: the eager buffer to get - * - * must only be called if ipath_pd[port] is known to be allocated - */ -static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) -{ - return dd->ipath_port0_skbinfo ? - (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; -} - -/** - * ipath_alloc_skb - allocate an skb and buffer with possible constraints - * @dd: the infinipath device - * @gfp_mask: the sk_buff SFP mask - */ -struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, - gfp_t gfp_mask) -{ - struct sk_buff *skb; - u32 len; - - /* - * Only fully supported way to handle this is to allocate lots - * extra, align as needed, and then do skb_reserve(). That wastes - * a lot of memory... I'll have to hack this into infinipath_copy - * also. - */ - - /* - * We need 2 extra bytes for ipath_ether data sent in the - * key header. In order to keep everything dword aligned, - * we'll reserve 4 bytes. - */ - len = dd->ipath_ibmaxlen + 4; - - if (dd->ipath_flags & IPATH_4BYTE_TID) { - /* We need a 2KB multiple alignment, and there is no way - * to do it except to allocate extra and then skb_reserve - * enough to bring it up to the right alignment. - */ - len += 2047; - } - - skb = __dev_alloc_skb(len, gfp_mask); - if (!skb) { - ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", - len); - goto bail; - } - - skb_reserve(skb, 4); - - if (dd->ipath_flags & IPATH_4BYTE_TID) { - u32 una = (unsigned long)skb->data & 2047; - if (una) - skb_reserve(skb, 2048 - una); - } - -bail: - return skb; -} - -static void ipath_rcv_hdrerr(struct ipath_devdata *dd, - u32 eflags, - u32 l, - u32 etail, - __le32 *rhf_addr, - struct ipath_message_header *hdr) -{ - char emsg[128]; - - get_rhf_errstring(eflags, emsg, sizeof emsg); - ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " - "tlen=%x opcode=%x egridx=%x: %s\n", - eflags, l, - ipath_hdrget_rcv_type(rhf_addr), - ipath_hdrget_length_in_bytes(rhf_addr), - be32_to_cpu(hdr->bth[0]) >> 24, - etail, emsg); - - /* Count local link integrity errors. */ - if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) { - u8 n = (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; - - if (++dd->ipath_lli_counter > n) { - dd->ipath_lli_counter = 0; - dd->ipath_lli_errors++; - } - } -} - -/* - * ipath_kreceive - receive a packet - * @pd: the infinipath port - * - * called from interrupt handler for errors or receive interrupt - */ -void ipath_kreceive(struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - __le32 *rhf_addr; - void *ebuf; - const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ - const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ - u32 etail = -1, l, hdrqtail; - struct ipath_message_header *hdr; - u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0; - static u64 totcalls; /* stats, may eventually remove */ - int last; - - l = pd->port_head; - rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset; - if (dd->ipath_flags & IPATH_NODMA_RTAIL) { - u32 seq = ipath_hdrget_seq(rhf_addr); - - if (seq != pd->port_seq_cnt) - goto bail; - hdrqtail = 0; - } else { - hdrqtail = ipath_get_rcvhdrtail(pd); - if (l == hdrqtail) - goto bail; - smp_rmb(); - } - -reloop: - for (last = 0, i = 1; !last; i += !last) { - hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); - eflags = ipath_hdrget_err_flags(rhf_addr); - etype = ipath_hdrget_rcv_type(rhf_addr); - /* total length */ - tlen = ipath_hdrget_length_in_bytes(rhf_addr); - ebuf = NULL; - if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ? - ipath_hdrget_use_egr_buf(rhf_addr) : - (etype != RCVHQ_RCV_TYPE_EXPECTED)) { - /* - * It turns out that the chip uses an eager buffer - * for all non-expected packets, whether it "needs" - * one or not. So always get the index, but don't - * set ebuf (so we try to copy data) unless the - * length requires it. - */ - etail = ipath_hdrget_index(rhf_addr); - updegr = 1; - if (tlen > sizeof(*hdr) || - etype == RCVHQ_RCV_TYPE_NON_KD) - ebuf = ipath_get_egrbuf(dd, etail); - } - - /* - * both tiderr and ipathhdrerr are set for all plain IB - * packets; only ipathhdrerr should be set. - */ - - if (etype != RCVHQ_RCV_TYPE_NON_KD && - etype != RCVHQ_RCV_TYPE_ERROR && - ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) != - IPS_PROTO_VERSION) - ipath_cdbg(PKT, "Bad InfiniPath protocol version " - "%x\n", etype); - - if (unlikely(eflags)) - ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr); - else if (etype == RCVHQ_RCV_TYPE_NON_KD) { - ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen); - if (dd->ipath_lli_counter) - dd->ipath_lli_counter--; - } else if (etype == RCVHQ_RCV_TYPE_EAGER) { - u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24; - u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff; - ipath_cdbg(PKT, "typ %x, opcode %x (eager, " - "qp=%x), len %x; ignored\n", - etype, opcode, qp, tlen); - } - else if (etype == RCVHQ_RCV_TYPE_EXPECTED) - ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", - be32_to_cpu(hdr->bth[0]) >> 24); - else { - /* - * error packet, type of error unknown. - * Probably type 3, but we don't know, so don't - * even try to print the opcode, etc. - * Usually caused by a "bad packet", that has no - * BTH, when the LRH says it should. - */ - ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" - " %x, len %x hdrq+%x rhf: %Lx\n", - etail, tlen, l, (unsigned long long) - le64_to_cpu(*(__le64 *) rhf_addr)); - if (ipath_debug & __IPATH_ERRPKTDBG) { - u32 j, *d, dw = rsize-2; - if (rsize > (tlen>>2)) - dw = tlen>>2; - d = (u32 *)hdr; - printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n", - dw); - for (j = 0; j < dw; j++) - printk(KERN_DEBUG "%8x%s", d[j], - (j%8) == 7 ? "\n" : " "); - printk(KERN_DEBUG ".\n"); - } - } - l += rsize; - if (l >= maxcnt) - l = 0; - rhf_addr = (__le32 *) pd->port_rcvhdrq + - l + dd->ipath_rhf_offset; - if (dd->ipath_flags & IPATH_NODMA_RTAIL) { - u32 seq = ipath_hdrget_seq(rhf_addr); - - if (++pd->port_seq_cnt > 13) - pd->port_seq_cnt = 1; - if (seq != pd->port_seq_cnt) - last = 1; - } else if (l == hdrqtail) - last = 1; - /* - * update head regs on last packet, and every 16 packets. - * Reduce bus traffic, while still trying to prevent - * rcvhdrq overflows, for when the queue is nearly full - */ - if (last || !(i & 0xf)) { - u64 lval = l; - - /* request IBA6120 and 7220 interrupt only on last */ - if (last) - lval |= dd->ipath_rhdrhead_intr_off; - ipath_write_ureg(dd, ur_rcvhdrhead, lval, - pd->port_port); - if (updegr) { - ipath_write_ureg(dd, ur_rcvegrindexhead, - etail, pd->port_port); - updegr = 0; - } - } - } - - if (!dd->ipath_rhdrhead_intr_off && !reloop && - !(dd->ipath_flags & IPATH_NODMA_RTAIL)) { - /* IBA6110 workaround; we can have a race clearing chip - * interrupt with another interrupt about to be delivered, - * and can clear it before it is delivered on the GPIO - * workaround. By doing the extra check here for the - * in-memory tail register updating while we were doing - * earlier packets, we "almost" guarantee we have covered - * that case. - */ - u32 hqtail = ipath_get_rcvhdrtail(pd); - if (hqtail != hdrqtail) { - hdrqtail = hqtail; - reloop = 1; /* loop 1 extra time at most */ - goto reloop; - } - } - - pkttot += i; - - pd->port_head = l; - - if (pkttot > ipath_stats.sps_maxpkts_call) - ipath_stats.sps_maxpkts_call = pkttot; - ipath_stats.sps_port0pkts += pkttot; - ipath_stats.sps_avgpkts_call = - ipath_stats.sps_port0pkts / ++totcalls; - -bail:; -} - -/** - * ipath_update_pio_bufs - update shadow copy of the PIO availability map - * @dd: the infinipath device - * - * called whenever our local copy indicates we have run out of send buffers - * NOTE: This can be called from interrupt context by some code - * and from non-interrupt context by ipath_getpiobuf(). - */ - -static void ipath_update_pio_bufs(struct ipath_devdata *dd) -{ - unsigned long flags; - int i; - const unsigned piobregs = (unsigned)dd->ipath_pioavregs; - - /* If the generation (check) bits have changed, then we update the - * busy bit for the corresponding PIO buffer. This algorithm will - * modify positions to the value they already have in some cases - * (i.e., no change), but it's faster than changing only the bits - * that have changed. - * - * We would like to do this atomicly, to avoid spinlocks in the - * critical send path, but that's not really possible, given the - * type of changes, and that this routine could be called on - * multiple cpu's simultaneously, so we lock in this routine only, - * to avoid conflicting updates; all we change is the shadow, and - * it's a single 64 bit memory location, so by definition the update - * is atomic in terms of what other cpu's can see in testing the - * bits. The spin_lock overhead isn't too bad, since it only - * happens when all buffers are in use, so only cpu overhead, not - * latency or bandwidth is affected. - */ - if (!dd->ipath_pioavailregs_dma) { - ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); - return; - } - if (ipath_debug & __IPATH_VERBDBG) { - /* only if packet debug and verbose */ - volatile __le64 *dma = dd->ipath_pioavailregs_dma; - unsigned long *shadow = dd->ipath_pioavailshadow; - - ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, " - "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx " - "s3=%lx\n", - (unsigned long long) le64_to_cpu(dma[0]), - shadow[0], - (unsigned long long) le64_to_cpu(dma[1]), - shadow[1], - (unsigned long long) le64_to_cpu(dma[2]), - shadow[2], - (unsigned long long) le64_to_cpu(dma[3]), - shadow[3]); - if (piobregs > 4) - ipath_cdbg( - PKT, "2nd group, dma4=%llx shad4=%lx, " - "d5=%llx s5=%lx, d6=%llx s6=%lx, " - "d7=%llx s7=%lx\n", - (unsigned long long) le64_to_cpu(dma[4]), - shadow[4], - (unsigned long long) le64_to_cpu(dma[5]), - shadow[5], - (unsigned long long) le64_to_cpu(dma[6]), - shadow[6], - (unsigned long long) le64_to_cpu(dma[7]), - shadow[7]); - } - spin_lock_irqsave(&ipath_pioavail_lock, flags); - for (i = 0; i < piobregs; i++) { - u64 pchbusy, pchg, piov, pnew; - /* - * Chip Errata: bug 6641; even and odd qwords>3 are swapped - */ - if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) - piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); - else - piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); - pchg = dd->ipath_pioavailkernel[i] & - ~(dd->ipath_pioavailshadow[i] ^ piov); - pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; - if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { - pnew = dd->ipath_pioavailshadow[i] & ~pchbusy; - pnew |= piov & pchbusy; - dd->ipath_pioavailshadow[i] = pnew; - } - } - spin_unlock_irqrestore(&ipath_pioavail_lock, flags); -} - -/* - * used to force update of pioavailshadow if we can't get a pio buffer. - * Needed primarily due to exitting freeze mode after recovering - * from errors. Done lazily, because it's safer (known to not - * be writing pio buffers). - */ -static void ipath_reset_availshadow(struct ipath_devdata *dd) -{ - int i, im; - unsigned long flags; - - spin_lock_irqsave(&ipath_pioavail_lock, flags); - for (i = 0; i < dd->ipath_pioavregs; i++) { - u64 val, oldval; - /* deal with 6110 chip bug on high register #s */ - im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? - i ^ 1 : i; - val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); - /* - * busy out the buffers not in the kernel avail list, - * without changing the generation bits. - */ - oldval = dd->ipath_pioavailshadow[i]; - dd->ipath_pioavailshadow[i] = val | - ((~dd->ipath_pioavailkernel[i] << - INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & - 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ - if (oldval != dd->ipath_pioavailshadow[i]) - ipath_dbg("shadow[%d] was %Lx, now %lx\n", - i, (unsigned long long) oldval, - dd->ipath_pioavailshadow[i]); - } - spin_unlock_irqrestore(&ipath_pioavail_lock, flags); -} - -/** - * ipath_setrcvhdrsize - set the receive header size - * @dd: the infinipath device - * @rhdrsize: the receive header size - * - * called from user init code, and also layered driver init - */ -int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) -{ - int ret = 0; - - if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) { - if (dd->ipath_rcvhdrsize != rhdrsize) { - dev_info(&dd->pcidev->dev, - "Error: can't set protocol header " - "size %u, already %u\n", - rhdrsize, dd->ipath_rcvhdrsize); - ret = -EAGAIN; - } else - ipath_cdbg(VERBOSE, "Reuse same protocol header " - "size %u\n", dd->ipath_rcvhdrsize); - } else if (rhdrsize > (dd->ipath_rcvhdrentsize - - (sizeof(u64) / sizeof(u32)))) { - ipath_dbg("Error: can't set protocol header size %u " - "(> max %u)\n", rhdrsize, - dd->ipath_rcvhdrentsize - - (u32) (sizeof(u64) / sizeof(u32))); - ret = -EOVERFLOW; - } else { - dd->ipath_flags |= IPATH_RCVHDRSZ_SET; - dd->ipath_rcvhdrsize = rhdrsize; - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, - dd->ipath_rcvhdrsize); - ipath_cdbg(VERBOSE, "Set protocol header size to %u\n", - dd->ipath_rcvhdrsize); - } - return ret; -} - -/* - * debugging code and stats updates if no pio buffers available. - */ -static noinline void no_pio_bufs(struct ipath_devdata *dd) -{ - unsigned long *shadow = dd->ipath_pioavailshadow; - __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma; - - dd->ipath_upd_pio_shadow = 1; - - /* - * not atomic, but if we lose a stat count in a while, that's OK - */ - ipath_stats.sps_nopiobufs++; - if (!(++dd->ipath_consec_nopiobuf % 100000)) { - ipath_force_pio_avail_update(dd); /* at start */ - ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " - "%llx %llx %llx %llx\n" - "ipath shadow: %lx %lx %lx %lx\n", - dd->ipath_consec_nopiobuf, - (unsigned long)get_cycles(), - (unsigned long long) le64_to_cpu(dma[0]), - (unsigned long long) le64_to_cpu(dma[1]), - (unsigned long long) le64_to_cpu(dma[2]), - (unsigned long long) le64_to_cpu(dma[3]), - shadow[0], shadow[1], shadow[2], shadow[3]); - /* - * 4 buffers per byte, 4 registers above, cover rest - * below - */ - if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > - (sizeof(shadow[0]) * 4 * 4)) - ipath_dbg("2nd group: dmacopy: " - "%llx %llx %llx %llx\n" - "ipath shadow: %lx %lx %lx %lx\n", - (unsigned long long)le64_to_cpu(dma[4]), - (unsigned long long)le64_to_cpu(dma[5]), - (unsigned long long)le64_to_cpu(dma[6]), - (unsigned long long)le64_to_cpu(dma[7]), - shadow[4], shadow[5], shadow[6], shadow[7]); - - /* at end, so update likely happened */ - ipath_reset_availshadow(dd); - } -} - -/* - * common code for normal driver pio buffer allocation, and reserved - * allocation. - * - * do appropriate marking as busy, etc. - * returns buffer number if one found (>=0), negative number is error. - */ -static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd, - u32 *pbufnum, u32 first, u32 last, u32 firsti) -{ - int i, j, updated = 0; - unsigned piobcnt; - unsigned long flags; - unsigned long *shadow = dd->ipath_pioavailshadow; - u32 __iomem *buf; - - piobcnt = last - first; - if (dd->ipath_upd_pio_shadow) { - /* - * Minor optimization. If we had no buffers on last call, - * start out by doing the update; continue and do scan even - * if no buffers were updated, to be paranoid - */ - ipath_update_pio_bufs(dd); - updated++; - i = first; - } else - i = firsti; -rescan: - /* - * while test_and_set_bit() is atomic, we do that and then the - * change_bit(), and the pair is not. See if this is the cause - * of the remaining armlaunch errors. - */ - spin_lock_irqsave(&ipath_pioavail_lock, flags); - for (j = 0; j < piobcnt; j++, i++) { - if (i >= last) - i = first; - if (__test_and_set_bit((2 * i) + 1, shadow)) - continue; - /* flip generation bit */ - __change_bit(2 * i, shadow); - break; - } - spin_unlock_irqrestore(&ipath_pioavail_lock, flags); - - if (j == piobcnt) { - if (!updated) { - /* - * first time through; shadow exhausted, but may be - * buffers available, try an update and then rescan. - */ - ipath_update_pio_bufs(dd); - updated++; - i = first; - goto rescan; - } else if (updated == 1 && piobcnt <= - ((dd->ipath_sendctrl - >> INFINIPATH_S_UPDTHRESH_SHIFT) & - INFINIPATH_S_UPDTHRESH_MASK)) { - /* - * for chips supporting and using the update - * threshold we need to force an update of the - * in-memory copy if the count is less than the - * thershold, then check one more time. - */ - ipath_force_pio_avail_update(dd); - ipath_update_pio_bufs(dd); - updated++; - i = first; - goto rescan; - } - - no_pio_bufs(dd); - buf = NULL; - } else { - if (i < dd->ipath_piobcnt2k) - buf = (u32 __iomem *) (dd->ipath_pio2kbase + - i * dd->ipath_palign); - else - buf = (u32 __iomem *) - (dd->ipath_pio4kbase + - (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); - if (pbufnum) - *pbufnum = i; - } - - return buf; -} - -/** - * ipath_getpiobuf - find an available pio buffer - * @dd: the infinipath device - * @plen: the size of the PIO buffer needed in 32-bit words - * @pbufnum: the buffer number is placed here - */ -u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) -{ - u32 __iomem *buf; - u32 pnum, nbufs; - u32 first, lasti; - - if (plen + 1 >= IPATH_SMALLBUF_DWORDS) { - first = dd->ipath_piobcnt2k; - lasti = dd->ipath_lastpioindexl; - } else { - first = 0; - lasti = dd->ipath_lastpioindex; - } - nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti); - - if (buf) { - /* - * Set next starting place. It's just an optimization, - * it doesn't matter who wins on this, so no locking - */ - if (plen + 1 >= IPATH_SMALLBUF_DWORDS) - dd->ipath_lastpioindexl = pnum + 1; - else - dd->ipath_lastpioindex = pnum + 1; - if (dd->ipath_upd_pio_shadow) - dd->ipath_upd_pio_shadow = 0; - if (dd->ipath_consec_nopiobuf) - dd->ipath_consec_nopiobuf = 0; - ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", - pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf); - if (pbufnum) - *pbufnum = pnum; - - } - return buf; -} - -/** - * ipath_chg_pioavailkernel - change which send buffers are available for kernel - * @dd: the infinipath device - * @start: the starting send buffer number - * @len: the number of send buffers - * @avail: true if the buffers are available for kernel use, false otherwise - */ -void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, - unsigned len, int avail) -{ - unsigned long flags; - unsigned end, cnt = 0; - - /* There are two bits per send buffer (busy and generation) */ - start *= 2; - end = start + len * 2; - - spin_lock_irqsave(&ipath_pioavail_lock, flags); - /* Set or clear the busy bit in the shadow. */ - while (start < end) { - if (avail) { - unsigned long dma; - int i, im; - /* - * the BUSY bit will never be set, because we disarm - * the user buffers before we hand them back to the - * kernel. We do have to make sure the generation - * bit is set correctly in shadow, since it could - * have changed many times while allocated to user. - * We can't use the bitmap functions on the full - * dma array because it is always little-endian, so - * we have to flip to host-order first. - * BITS_PER_LONG is slightly wrong, since it's - * always 64 bits per register in chip... - * We only work on 64 bit kernels, so that's OK. - */ - /* deal with 6110 chip bug on high register #s */ - i = start / BITS_PER_LONG; - im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? - i ^ 1 : i; - __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT - + start, dd->ipath_pioavailshadow); - dma = (unsigned long) le64_to_cpu( - dd->ipath_pioavailregs_dma[im]); - if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT - + start) % BITS_PER_LONG, &dma)) - __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT - + start, dd->ipath_pioavailshadow); - else - __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT - + start, dd->ipath_pioavailshadow); - __set_bit(start, dd->ipath_pioavailkernel); - } else { - __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, - dd->ipath_pioavailshadow); - __clear_bit(start, dd->ipath_pioavailkernel); - } - start += 2; - } - - if (dd->ipath_pioupd_thresh) { - end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); - cnt = bitmap_weight(dd->ipath_pioavailkernel, end); - } - spin_unlock_irqrestore(&ipath_pioavail_lock, flags); - - /* - * When moving buffers from kernel to user, if number assigned to - * the user is less than the pio update threshold, and threshold - * is supported (cnt was computed > 0), drop the update threshold - * so we update at least once per allocated number of buffers. - * In any case, if the kernel buffers are less than the threshold, - * drop the threshold. We don't bother increasing it, having once - * decreased it, since it would typically just cycle back and forth. - * If we don't decrease below buffers in use, we can wait a long - * time for an update, until some other context uses PIO buffers. - */ - if (!avail && len < cnt) - cnt = len; - if (cnt < dd->ipath_pioupd_thresh) { - dd->ipath_pioupd_thresh = cnt; - ipath_dbg("Decreased pio update threshold to %u\n", - dd->ipath_pioupd_thresh); - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK - << INFINIPATH_S_UPDTHRESH_SHIFT); - dd->ipath_sendctrl |= dd->ipath_pioupd_thresh - << INFINIPATH_S_UPDTHRESH_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } -} - -/** - * ipath_create_rcvhdrq - create a receive header queue - * @dd: the infinipath device - * @pd: the port data - * - * this must be contiguous memory (from an i/o perspective), and must be - * DMA'able (which means for some systems, it will go through an IOMMU, - * or be forced into a low address range). - */ -int ipath_create_rcvhdrq(struct ipath_devdata *dd, - struct ipath_portdata *pd) -{ - int ret = 0; - - if (!pd->port_rcvhdrq) { - dma_addr_t phys_hdrqtail; - gfp_t gfp_flags = GFP_USER | __GFP_COMP; - int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * - sizeof(u32), PAGE_SIZE); - - pd->port_rcvhdrq = dma_alloc_coherent( - &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, - gfp_flags); - - if (!pd->port_rcvhdrq) { - ipath_dev_err(dd, "attempt to allocate %d bytes " - "for port %u rcvhdrq failed\n", - amt, pd->port_port); - ret = -ENOMEM; - goto bail; - } - - if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { - pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, - GFP_KERNEL); - if (!pd->port_rcvhdrtail_kvaddr) { - ipath_dev_err(dd, "attempt to allocate 1 page " - "for port %u rcvhdrqtailaddr " - "failed\n", pd->port_port); - ret = -ENOMEM; - dma_free_coherent(&dd->pcidev->dev, amt, - pd->port_rcvhdrq, - pd->port_rcvhdrq_phys); - pd->port_rcvhdrq = NULL; - goto bail; - } - pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; - ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx " - "physical\n", pd->port_port, - (unsigned long long) phys_hdrqtail); - } - - pd->port_rcvhdrq_size = amt; - - ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu " - "for port %u rcvhdr Q\n", - amt >> PAGE_SHIFT, pd->port_rcvhdrq, - (unsigned long) pd->port_rcvhdrq_phys, - (unsigned long) pd->port_rcvhdrq_size, - pd->port_port); - } - else - ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " - "hdrtailaddr@%p %llx physical\n", - pd->port_port, pd->port_rcvhdrq, - (unsigned long long) pd->port_rcvhdrq_phys, - pd->port_rcvhdrtail_kvaddr, (unsigned long long) - pd->port_rcvhdrqtailaddr_phys); - - /* clear for security and sanity on each use */ - memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); - if (pd->port_rcvhdrtail_kvaddr) - memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); - - /* - * tell chip each time we init it, even if we are re-using previous - * memory (we zero the register at process close) - */ - ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, - pd->port_port, pd->port_rcvhdrqtailaddr_phys); - ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, - pd->port_port, pd->port_rcvhdrq_phys); - -bail: - return ret; -} - - -/* - * Flush all sends that might be in the ready to send state, as well as any - * that are in the process of being sent. Used whenever we need to be - * sure the send side is idle. Cleans up all buffer state by canceling - * all pio buffers, and issuing an abort, which cleans up anything in the - * launch fifo. The cancel is superfluous on some chip versions, but - * it's safer to always do it. - * PIOAvail bits are updated by the chip as if normal send had happened. - */ -void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) -{ - unsigned long flags; - - if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { - ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); - goto bail; - } - /* - * If we have SDMA, and it's not disabled, we have to kick off the - * abort state machine, provided we aren't already aborting. - * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), - * we skip the rest of this routine. It is already "in progress" - */ - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { - int skip_cancel; - unsigned long *statp = &dd->ipath_sdma_status; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - skip_cancel = - test_and_set_bit(IPATH_SDMA_ABORTING, statp) - && !test_bit(IPATH_SDMA_DISABLED, statp); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (skip_cancel) - goto bail; - } - - ipath_dbg("Cancelling all in-progress send buffers\n"); - - /* skip armlaunch errs for a while */ - dd->ipath_lastcancel = jiffies + HZ / 2; - - /* - * The abort bit is auto-clearing. We also don't want pioavail - * update happening during this, and we don't want any other - * sends going out, so turn those off for the duration. We read - * the scratch register to be sure that cancels and the abort - * have taken effect in the chip. Otherwise two parts are same - * as ipath_force_pio_avail_update() - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD - | INFINIPATH_S_PIOENABLE); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl | INFINIPATH_S_ABORT); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* disarm all send buffers */ - ipath_disarm_piobufs(dd, 0, - dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); - - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); - - if (restore_sendctrl) { - /* else done by caller later if needed */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | - INFINIPATH_S_PIOENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - /* and again, be sure all have hit the chip */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } - - if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && - !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && - test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - /* only wait so long for intr */ - dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; - dd->ipath_sdma_reset_wait = 200; - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - tasklet_hi_schedule(&dd->ipath_sdma_abort_task); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - } -bail:; -} - -/* - * Force an update of in-memory copy of the pioavail registers, when - * needed for any of a variety of reasons. We read the scratch register - * to make it highly likely that the update will have happened by the - * time we return. If already off (as in cancel_sends above), this - * routine is a nop, on the assumption that the caller will "do the - * right thing". - */ -void ipath_force_pio_avail_update(struct ipath_devdata *dd) -{ - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - } - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); -} - -static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, - int linitcmd) -{ - u64 mod_wd; - static const char *what[4] = { - [0] = "NOP", - [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", - [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", - [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" - }; - - if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { - /* - * If we are told to disable, note that so link-recovery - * code does not attempt to bring us back up. - */ - preempt_disable(); - dd->ipath_flags |= IPATH_IB_LINK_DISABLED; - preempt_enable(); - } else if (linitcmd) { - /* - * Any other linkinitcmd will lead to LINKDOWN and then - * to INIT (if all is well), so clear flag to let - * link-recovery code attempt to bring us back up. - */ - preempt_disable(); - dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; - preempt_enable(); - } - - mod_wd = (linkcmd << dd->ibcc_lc_shift) | - (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT); - ipath_cdbg(VERBOSE, - "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n", - dd->ipath_unit, what[linkcmd], linitcmd, - ipath_ibcstatus_str[ipath_ib_linktrstate(dd, - ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl | mod_wd); - /* read from chip so write is flushed */ - (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); -} - -int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) -{ - u32 lstate; - int ret; - - switch (newstate) { - case IPATH_IB_LINKDOWN_ONLY: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, - INFINIPATH_IBCC_LINKINITCMD_POLL); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_SLEEP: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, - INFINIPATH_IBCC_LINKINITCMD_SLEEP); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_DISABLE: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, - INFINIPATH_IBCC_LINKINITCMD_DISABLE); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKARM: - if (dd->ipath_flags & IPATH_LINKARMED) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & - (IPATH_LINKINIT | IPATH_LINKACTIVE))) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0); - - /* - * Since the port can transition to ACTIVE by receiving - * a non VL 15 packet, wait for either state. - */ - lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; - break; - - case IPATH_IB_LINKACTIVE: - if (dd->ipath_flags & IPATH_LINKACTIVE) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & IPATH_LINKARMED)) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0); - lstate = IPATH_LINKACTIVE; - break; - - case IPATH_IB_LINK_LOOPBACK: - dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); - dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - - /* turn heartbeat off, as it causes loopback to fail */ - dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, - IPATH_IB_HRTBT_OFF); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINK_EXTERNAL: - dev_info(&dd->pcidev->dev, - "Disabling IB local loopback (normal)\n"); - dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, - IPATH_IB_HRTBT_ON); - dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - /* don't wait */ - ret = 0; - goto bail; - - /* - * Heartbeat can be explicitly enabled by the user via - * "hrtbt_enable" "file", and if disabled, trying to enable here - * will have no effect. Implicit changes (heartbeat off when - * loopback on, and vice versa) are included to ease testing. - */ - case IPATH_IB_LINK_HRTBT: - ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, - IPATH_IB_HRTBT_ON); - goto bail; - - case IPATH_IB_LINK_NO_HRTBT: - ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, - IPATH_IB_HRTBT_OFF); - goto bail; - - default: - ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); - ret = -EINVAL; - goto bail; - } - ret = ipath_wait_linkstate(dd, lstate, 2000); - -bail: - return ret; -} - -/** - * ipath_set_mtu - set the MTU - * @dd: the infinipath device - * @arg: the new MTU - * - * we can handle "any" incoming size, the issue here is whether we - * need to restrict our outgoing size. For now, we don't do any - * sanity checking on this, and we don't deal with what happens to - * programs that are already running when the size changes. - * NOTE: changing the MTU will usually cause the IBC to go back to - * link INIT state... - */ -int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) -{ - u32 piosize; - int changed = 0; - int ret; - - /* - * mtu is IB data payload max. It's the largest power of 2 less - * than piosize (or even larger, since it only really controls the - * largest we can receive; we can send the max of the mtu and - * piosize). We check that it's one of the valid IB sizes. - */ - if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && - (arg != 4096 || !ipath_mtu4096)) { - ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); - ret = -EINVAL; - goto bail; - } - if (dd->ipath_ibmtu == arg) { - ret = 0; /* same as current */ - goto bail; - } - - piosize = dd->ipath_ibmaxlen; - dd->ipath_ibmtu = arg; - - if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { - /* Only if it's not the initial value (or reset to it) */ - if (piosize != dd->ipath_init_ibmaxlen) { - if (arg > piosize && arg <= dd->ipath_init_ibmaxlen) - piosize = dd->ipath_init_ibmaxlen; - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { - piosize = arg + IPATH_PIO_MAXIBHDR; - ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " - "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, - arg); - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - - if (changed) { - u64 ibc = dd->ipath_ibcctrl, ibdw; - /* - * update our housekeeping variables, and set IBC max - * size, same as init code; max IBC is max we allow in - * buffer, less the qword pbc, plus 1 for ICRC, in dwords - */ - dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32); - ibdw = (dd->ipath_ibmaxlen >> 2) + 1; - ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << - dd->ibcc_mpl_shift); - ibc |= ibdw << dd->ibcc_mpl_shift; - dd->ipath_ibcctrl = ibc; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - dd->ipath_f_tidtemplate(dd); - } - - ret = 0; - -bail: - return ret; -} - -int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc) -{ - dd->ipath_lid = lid; - dd->ipath_lmc = lmc; - - dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid | - (~((1U << lmc) - 1)) << 16); - - dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid); - - return 0; -} - - -/** - * ipath_write_kreg_port - write a device's per-port 64-bit kernel register - * @dd: the infinipath device - * @regno: the register number to write - * @port: the port containing the register - * @value: the value to write - * - * Registers that vary with the chip implementation constants (port) - * use this routine. - */ -void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, - unsigned port, u64 value) -{ - u16 where; - - if (port < dd->ipath_portcnt && - (regno == dd->ipath_kregs->kr_rcvhdraddr || - regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) - where = regno + port; - else - where = -1; - - ipath_write_kreg(dd, where, value); -} - -/* - * Following deal with the "obviously simple" task of overriding the state - * of the LEDS, which normally indicate link physical and logical status. - * The complications arise in dealing with different hardware mappings - * and the board-dependent routine being called from interrupts. - * and then there's the requirement to _flash_ them. - */ -#define LED_OVER_FREQ_SHIFT 8 -#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) -/* Below is "non-zero" to force override, but both actual LEDs are off */ -#define LED_OVER_BOTH_OFF (8) - -static void ipath_run_led_override(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - int timeoff; - int pidx; - u64 lstate, ltstate, val; - - if (!(dd->ipath_flags & IPATH_INITTED)) - return; - - pidx = dd->ipath_led_override_phase++ & 1; - dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; - timeoff = dd->ipath_led_override_timeoff; - - /* - * below potentially restores the LED values per current status, - * should also possibly setup the traffic-blink register, - * but leave that to per-chip functions. - */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - ltstate = ipath_ib_linktrstate(dd, val); - lstate = ipath_ib_linkstate(dd, val); - - dd->ipath_f_setextled(dd, lstate, ltstate); - mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); -} - -void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) -{ - int timeoff, freq; - - if (!(dd->ipath_flags & IPATH_INITTED)) - return; - - /* First check if we are blinking. If not, use 1HZ polling */ - timeoff = HZ; - freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; - - if (freq) { - /* For blink, set each phase from one nybble of val */ - dd->ipath_led_override_vals[0] = val & 0xF; - dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; - timeoff = (HZ << 4)/freq; - } else { - /* Non-blink set both phases the same. */ - dd->ipath_led_override_vals[0] = val & 0xF; - dd->ipath_led_override_vals[1] = val & 0xF; - } - dd->ipath_led_override_timeoff = timeoff; - - /* - * If the timer has not already been started, do so. Use a "quick" - * timeout so the function will be called soon, to look at our request. - */ - if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { - /* Need to start timer */ - init_timer(&dd->ipath_led_override_timer); - dd->ipath_led_override_timer.function = - ipath_run_led_override; - dd->ipath_led_override_timer.data = (unsigned long) dd; - dd->ipath_led_override_timer.expires = jiffies + 1; - add_timer(&dd->ipath_led_override_timer); - } else - atomic_dec(&dd->ipath_led_override_timer_active); -} - -/** - * ipath_shutdown_device - shut down a device - * @dd: the infinipath device - * - * This is called to make the device quiet when we are about to - * unload the driver, and also when the device is administratively - * disabled. It does not free any data structures. - * Everything it does has to be setup again by ipath_init_chip(dd,1) - */ -void ipath_shutdown_device(struct ipath_devdata *dd) -{ - unsigned long flags; - - ipath_dbg("Shutting down the device\n"); - - ipath_hol_up(dd); /* make sure user processes aren't suspended */ - - dd->ipath_flags |= IPATH_LINKUNK; - dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | - IPATH_LINKINIT | IPATH_LINKARMED | - IPATH_LINKACTIVE); - *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | - IPATH_STATUS_IB_READY); - - /* mask interrupts, but not errors */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - - dd->ipath_rcvctrl = 0; - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - teardown_sdma(dd); - - /* - * gracefully stop all sends allowing any in progress to trickle out - * first. - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl = 0; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - /* flush it */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* - * enough for anything that's going to trickle out to have actually - * done so. - */ - udelay(5); - - dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ - - ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); - ipath_cancel_sends(dd, 0); - - /* - * we are shutting down, so tell components that care. We don't do - * this on just a link state change, much like ethernet, a cable - * unplug, etc. doesn't change driver state - */ - signal_ib_event(dd, IB_EVENT_PORT_ERR); - - /* disable IBC */ - dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control | INFINIPATH_C_FREEZEMODE); - - /* - * clear SerdesEnable and turn the leds off; do this here because - * we are unloading, so don't count on interrupts to move along - * Turn the LEDs off explicitly for the same reason. - */ - dd->ipath_f_quiet_serdes(dd); - - /* stop all the timers that might still be running */ - del_timer_sync(&dd->ipath_hol_timer); - if (dd->ipath_stats_timer_active) { - del_timer_sync(&dd->ipath_stats_timer); - dd->ipath_stats_timer_active = 0; - } - if (dd->ipath_intrchk_timer.data) { - del_timer_sync(&dd->ipath_intrchk_timer); - dd->ipath_intrchk_timer.data = 0; - } - if (atomic_read(&dd->ipath_led_override_timer_active)) { - del_timer_sync(&dd->ipath_led_override_timer); - atomic_set(&dd->ipath_led_override_timer_active, 0); - } - - /* - * clear all interrupts and errors, so that the next time the driver - * is loaded or device is enabled, we know that whatever is set - * happened while we were unloaded - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); - - ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); - ipath_update_eeprom_log(dd); -} - -/** - * ipath_free_pddata - free a port's allocated data - * @dd: the infinipath device - * @pd: the portdata structure - * - * free up any allocated data for a port - * This should not touch anything that would affect a simultaneous - * re-allocation of port data, because it is called after ipath_mutex - * is released (and can be called from reinit as well). - * It should never change any chip state, or global driver state. - * (The only exception to global state is freeing the port0 port0_skbs.) - */ -void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) -{ - if (!pd) - return; - - if (pd->port_rcvhdrq) { - ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " - "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, - (unsigned long) pd->port_rcvhdrq_size); - dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, - pd->port_rcvhdrq, pd->port_rcvhdrq_phys); - pd->port_rcvhdrq = NULL; - if (pd->port_rcvhdrtail_kvaddr) { - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - pd->port_rcvhdrtail_kvaddr, - pd->port_rcvhdrqtailaddr_phys); - pd->port_rcvhdrtail_kvaddr = NULL; - } - } - if (pd->port_port && pd->port_rcvegrbuf) { - unsigned e; - - for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { - void *base = pd->port_rcvegrbuf[e]; - size_t size = pd->port_rcvegrbuf_size; - - ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " - "chunk %u/%u\n", base, - (unsigned long) size, - e, pd->port_rcvegrbuf_chunks); - dma_free_coherent(&dd->pcidev->dev, size, - base, pd->port_rcvegrbuf_phys[e]); - } - kfree(pd->port_rcvegrbuf); - pd->port_rcvegrbuf = NULL; - kfree(pd->port_rcvegrbuf_phys); - pd->port_rcvegrbuf_phys = NULL; - pd->port_rcvegrbuf_chunks = 0; - } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { - unsigned e; - struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; - - dd->ipath_port0_skbinfo = NULL; - ipath_cdbg(VERBOSE, "free closed port %d " - "ipath_port0_skbinfo @ %p\n", pd->port_port, - skbinfo); - for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) - if (skbinfo[e].skb) { - pci_unmap_single(dd->pcidev, skbinfo[e].phys, - dd->ipath_ibmaxlen, - PCI_DMA_FROMDEVICE); - dev_kfree_skb(skbinfo[e].skb); - } - vfree(skbinfo); - } - kfree(pd->port_tid_pg_list); - vfree(pd->subport_uregbase); - vfree(pd->subport_rcvegrbuf); - vfree(pd->subport_rcvhdr_base); - kfree(pd); -} - -static int __init infinipath_init(void) -{ - int ret; - - if (ipath_debug & __IPATH_DBG) - printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); - - /* - * These must be called before the driver is registered with - * the PCI subsystem. - */ - idr_init(&unit_table); - - ret = pci_register_driver(&ipath_driver); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME - ": Unable to register driver: error %d\n", -ret); - goto bail_unit; - } - - ret = ipath_init_ipathfs(); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " - "ipathfs: error %d\n", -ret); - goto bail_pci; - } - - goto bail; - -bail_pci: - pci_unregister_driver(&ipath_driver); - -bail_unit: - idr_destroy(&unit_table); - -bail: - return ret; -} - -static void __exit infinipath_cleanup(void) -{ - ipath_exit_ipathfs(); - - ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); - pci_unregister_driver(&ipath_driver); - - idr_destroy(&unit_table); -} - -/** - * ipath_reset_device - reset the chip if possible - * @unit: the device to reset - * - * Whether or not reset is successful, we attempt to re-initialize the chip - * (that is, much like a driver unload/reload). We clear the INITTED flag - * so that the various entry points will fail until we reinitialize. For - * now, we only allow this if no user ports are open that use chip resources - */ -int ipath_reset_device(int unit) -{ - int ret, i; - struct ipath_devdata *dd = ipath_lookup(unit); - unsigned long flags; - - if (!dd) { - ret = -ENODEV; - goto bail; - } - - if (atomic_read(&dd->ipath_led_override_timer_active)) { - /* Need to stop LED timer, _then_ shut off LEDs */ - del_timer_sync(&dd->ipath_led_override_timer); - atomic_set(&dd->ipath_led_override_timer_active, 0); - } - - /* Shut off LEDs after we are sure timer is not running */ - dd->ipath_led_override = LED_OVER_BOTH_OFF; - dd->ipath_f_setextled(dd, 0, 0); - - dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); - - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { - dev_info(&dd->pcidev->dev, "Invalid unit number %u or " - "not initialized or not present\n", unit); - ret = -ENXIO; - goto bail; - } - - spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); - if (dd->ipath_pd) - for (i = 1; i < dd->ipath_cfgports; i++) { - if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) - continue; - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - ipath_dbg("unit %u port %d is in use " - "(PID %u cmd %s), can't reset\n", - unit, i, - pid_nr(dd->ipath_pd[i]->port_pid), - dd->ipath_pd[i]->port_comm); - ret = -EBUSY; - goto bail; - } - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - teardown_sdma(dd); - - dd->ipath_flags &= ~IPATH_INITTED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - ret = dd->ipath_f_reset(dd); - if (ret == 1) { - ipath_dbg("Reinitializing unit %u after reset attempt\n", - unit); - ret = ipath_init_chip(dd, 1); - } else - ret = -EAGAIN; - if (ret) - ipath_dev_err(dd, "Reinitialize unit %u after " - "reset failed with %d\n", unit, ret); - else - dev_info(&dd->pcidev->dev, "Reinitialized unit %u after " - "resetting\n", unit); - -bail: - return ret; -} - -/* - * send a signal to all the processes that have the driver open - * through the normal interfaces (i.e., everything other than diags - * interface). Returns number of signalled processes. - */ -static int ipath_signal_procs(struct ipath_devdata *dd, int sig) -{ - int i, sub, any = 0; - struct pid *pid; - unsigned long flags; - - if (!dd->ipath_pd) - return 0; - - spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); - for (i = 1; i < dd->ipath_cfgports; i++) { - if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) - continue; - pid = dd->ipath_pd[i]->port_pid; - if (!pid) - continue; - - dev_info(&dd->pcidev->dev, "context %d in use " - "(PID %u), sending signal %d\n", - i, pid_nr(pid), sig); - kill_pid(pid, sig, 1); - any++; - for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { - pid = dd->ipath_pd[i]->port_subpid[sub]; - if (!pid) - continue; - dev_info(&dd->pcidev->dev, "sub-context " - "%d:%d in use (PID %u), sending " - "signal %d\n", i, sub, pid_nr(pid), sig); - kill_pid(pid, sig, 1); - any++; - } - } - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - return any; -} - -static void ipath_hol_signal_down(struct ipath_devdata *dd) -{ - if (ipath_signal_procs(dd, SIGSTOP)) - ipath_dbg("Stopped some processes\n"); - ipath_cancel_sends(dd, 1); -} - - -static void ipath_hol_signal_up(struct ipath_devdata *dd) -{ - if (ipath_signal_procs(dd, SIGCONT)) - ipath_dbg("Continued some processes\n"); -} - -/* - * link is down, stop any users processes, and flush pending sends - * to prevent HoL blocking, then start the HoL timer that - * periodically continues, then stop procs, so they can detect - * link down if they want, and do something about it. - * Timer may already be running, so use mod_timer, not add_timer. - */ -void ipath_hol_down(struct ipath_devdata *dd) -{ - dd->ipath_hol_state = IPATH_HOL_DOWN; - ipath_hol_signal_down(dd); - dd->ipath_hol_next = IPATH_HOL_DOWNCONT; - dd->ipath_hol_timer.expires = jiffies + - msecs_to_jiffies(ipath_hol_timeout_ms); - mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); -} - -/* - * link is up, continue any user processes, and ensure timer - * is a nop, if running. Let timer keep running, if set; it - * will nop when it sees the link is up - */ -void ipath_hol_up(struct ipath_devdata *dd) -{ - ipath_hol_signal_up(dd); - dd->ipath_hol_state = IPATH_HOL_UP; -} - -/* - * toggle the running/not running state of user proceses - * to prevent HoL blocking on chip resources, but still allow - * user processes to do link down special case handling. - * Should only be called via the timer - */ -void ipath_hol_event(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - - if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP - && dd->ipath_hol_state != IPATH_HOL_UP) { - dd->ipath_hol_next = IPATH_HOL_DOWNCONT; - ipath_dbg("Stopping processes\n"); - ipath_hol_signal_down(dd); - } else { /* may do "extra" if also in ipath_hol_up() */ - dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; - ipath_dbg("Continuing processes\n"); - ipath_hol_signal_up(dd); - } - if (dd->ipath_hol_state == IPATH_HOL_UP) - ipath_dbg("link's up, don't resched timer\n"); - else { - dd->ipath_hol_timer.expires = jiffies + - msecs_to_jiffies(ipath_hol_timeout_ms); - mod_timer(&dd->ipath_hol_timer, - dd->ipath_hol_timer.expires); - } -} - -int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) -{ - u64 val; - - if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK) - return -1; - if (dd->ipath_rx_pol_inv != new_pol_inv) { - dd->ipath_rx_pol_inv = new_pol_inv; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - val &= ~(INFINIPATH_XGXS_RX_POL_MASK << - INFINIPATH_XGXS_RX_POL_SHIFT); - val |= ((u64)dd->ipath_rx_pol_inv) << - INFINIPATH_XGXS_RX_POL_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - } - return 0; -} - -/* - * Disable and enable the armlaunch error. Used for PIO bandwidth testing on - * the 7220, which is count-based, rather than trigger-based. Safe for the - * driver check, since it's at init. Not completely safe when used for - * user-mode checking, since some error checking can be lost, but not - * particularly risky, and only has problematic side-effects in the face of - * very buggy user code. There is no reference counting, but that's also - * fine, given the intended use. - */ -void ipath_enable_armlaunch(struct ipath_devdata *dd) -{ - dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, - INFINIPATH_E_SPIOARMLAUNCH); - dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); -} - -void ipath_disable_armlaunch(struct ipath_devdata *dd) -{ - /* so don't re-enable if already set */ - dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; - dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); -} - -module_init(infinipath_init); -module_exit(infinipath_cleanup); diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_eeprom.c b/kernel/drivers/infiniband/hw/ipath/ipath_eeprom.c deleted file mode 100644 index fc7181985..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ /dev/null @@ -1,1183 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/delay.h> -#include <linux/pci.h> -#include <linux/vmalloc.h> - -#include "ipath_kernel.h" - -/* - * InfiniPath I2C driver for a serial eeprom. This is not a generic - * I2C interface. For a start, the device we're using (Atmel AT24C11) - * doesn't work like a regular I2C device. It looks like one - * electrically, but not logically. Normal I2C devices have a single - * 7-bit or 10-bit I2C address that they respond to. Valid 7-bit - * addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78 - * to 0x7F are special reserved addresses (e.g. 0x00 is the "general - * call" address.) The Atmel device, on the other hand, responds to ALL - * 7-bit addresses. It's designed to be the only device on a given I2C - * bus. A 7-bit address corresponds to the memory address within the - * Atmel device itself. - * - * Also, the timing requirements mean more than simple software - * bitbanging, with readbacks from chip to ensure timing (simple udelay - * is not enough). - * - * This all means that accessing the device is specialized enough - * that using the standard kernel I2C bitbanging interface would be - * impossible. For example, the core I2C eeprom driver expects to find - * a device at one or more of a limited set of addresses only. It doesn't - * allow writing to an eeprom. It also doesn't provide any means of - * accessing eeprom contents from within the kernel, only via sysfs. - */ - -/* Added functionality for IBA7220-based cards */ -#define IPATH_EEPROM_DEV_V1 0xA0 -#define IPATH_EEPROM_DEV_V2 0xA2 -#define IPATH_TEMP_DEV 0x98 -#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2) -#define IPATH_NO_DEV (0xFF) - -/* - * The number of I2C chains is proliferating. Table below brings - * some order to the madness. The basic principle is that the - * table is scanned from the top, and a "probe" is made to the - * device probe_dev. If that succeeds, the chain is considered - * to be of that type, and dd->i2c_chain_type is set to the index+1 - * of the entry. - * The +1 is so static initialization can mean "unknown, do probe." - */ -static struct i2c_chain_desc { - u8 probe_dev; /* If seen at probe, chain is this type */ - u8 eeprom_dev; /* Dev addr (if any) for EEPROM */ - u8 temp_dev; /* Dev Addr (if any) for Temp-sense */ -} i2c_chains[] = { - { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */ - { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */ - { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */ - { IPATH_NO_DEV } -}; - -enum i2c_type { - i2c_line_scl = 0, - i2c_line_sda -}; - -enum i2c_state { - i2c_line_low = 0, - i2c_line_high -}; - -#define READ_CMD 1 -#define WRITE_CMD 0 - -/** - * i2c_gpio_set - set a GPIO line - * @dd: the infinipath device - * @line: the line to set - * @new_line_state: the state to set - * - * Returns 0 if the line was set to the new state successfully, non-zero - * on error. - */ -static int i2c_gpio_set(struct ipath_devdata *dd, - enum i2c_type line, - enum i2c_state new_line_state) -{ - u64 out_mask, dir_mask, *gpioval; - unsigned long flags = 0; - - gpioval = &dd->ipath_gpio_out; - - if (line == i2c_line_scl) { - dir_mask = dd->ipath_gpio_scl; - out_mask = (1UL << dd->ipath_gpio_scl_num); - } else { - dir_mask = dd->ipath_gpio_sda; - out_mask = (1UL << dd->ipath_gpio_sda_num); - } - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - if (new_line_state == i2c_line_high) { - /* tri-state the output rather than force high */ - dd->ipath_extctrl &= ~dir_mask; - } else { - /* config line to be an output */ - dd->ipath_extctrl |= dir_mask; - } - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); - - /* set output as well (no real verify) */ - if (new_line_state == i2c_line_high) - *gpioval |= out_mask; - else - *gpioval &= ~out_mask; - - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); - - return 0; -} - -/** - * i2c_gpio_get - get a GPIO line state - * @dd: the infinipath device - * @line: the line to get - * @curr_statep: where to put the line state - * - * Returns 0 if the line was set to the new state successfully, non-zero - * on error. curr_state is not set on error. - */ -static int i2c_gpio_get(struct ipath_devdata *dd, - enum i2c_type line, - enum i2c_state *curr_statep) -{ - u64 read_val, mask; - int ret; - unsigned long flags = 0; - - /* check args */ - if (curr_statep == NULL) { - ret = 1; - goto bail; - } - - /* config line to be an input */ - if (line == i2c_line_scl) - mask = dd->ipath_gpio_scl; - else - mask = dd->ipath_gpio_sda; - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - dd->ipath_extctrl &= ~mask; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); - /* - * Below is very unlikely to reflect true input state if Output - * Enable actually changed. - */ - read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); - - if (read_val & mask) - *curr_statep = i2c_line_high; - else - *curr_statep = i2c_line_low; - - ret = 0; - -bail: - return ret; -} - -/** - * i2c_wait_for_writes - wait for a write - * @dd: the infinipath device - * - * We use this instead of udelay directly, so we can make sure - * that previous register writes have been flushed all the way - * to the chip. Since we are delaying anyway, the cost doesn't - * hurt, and makes the bit twiddling more regular - */ -static void i2c_wait_for_writes(struct ipath_devdata *dd) -{ - (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - rmb(); -} - -static void scl_out(struct ipath_devdata *dd, u8 bit) -{ - udelay(1); - i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low); - - i2c_wait_for_writes(dd); -} - -static void sda_out(struct ipath_devdata *dd, u8 bit) -{ - i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low); - - i2c_wait_for_writes(dd); -} - -static u8 sda_in(struct ipath_devdata *dd, int wait) -{ - enum i2c_state bit; - - if (i2c_gpio_get(dd, i2c_line_sda, &bit)) - ipath_dbg("get bit failed!\n"); - - if (wait) - i2c_wait_for_writes(dd); - - return bit == i2c_line_high ? 1U : 0; -} - -/** - * i2c_ackrcv - see if ack following write is true - * @dd: the infinipath device - */ -static int i2c_ackrcv(struct ipath_devdata *dd) -{ - u8 ack_received; - - /* AT ENTRY SCL = LOW */ - /* change direction, ignore data */ - ack_received = sda_in(dd, 1); - scl_out(dd, i2c_line_high); - ack_received = sda_in(dd, 1) == 0; - scl_out(dd, i2c_line_low); - return ack_received; -} - -/** - * rd_byte - read a byte, leaving ACK, STOP, etc up to caller - * @dd: the infinipath device - * - * Returns byte shifted out of device - */ -static int rd_byte(struct ipath_devdata *dd) -{ - int bit_cntr, data; - - data = 0; - - for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) { - data <<= 1; - scl_out(dd, i2c_line_high); - data |= sda_in(dd, 0); - scl_out(dd, i2c_line_low); - } - return data; -} - -/** - * wr_byte - write a byte, one bit at a time - * @dd: the infinipath device - * @data: the byte to write - * - * Returns 0 if we got the following ack, otherwise 1 - */ -static int wr_byte(struct ipath_devdata *dd, u8 data) -{ - int bit_cntr; - u8 bit; - - for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { - bit = (data >> bit_cntr) & 1; - sda_out(dd, bit); - scl_out(dd, i2c_line_high); - scl_out(dd, i2c_line_low); - } - return (!i2c_ackrcv(dd)) ? 1 : 0; -} - -static void send_ack(struct ipath_devdata *dd) -{ - sda_out(dd, i2c_line_low); - scl_out(dd, i2c_line_high); - scl_out(dd, i2c_line_low); - sda_out(dd, i2c_line_high); -} - -/** - * i2c_startcmd - transmit the start condition, followed by address/cmd - * @dd: the infinipath device - * @offset_dir: direction byte - * - * (both clock/data high, clock high, data low while clock is high) - */ -static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir) -{ - int res; - - /* issue start sequence */ - sda_out(dd, i2c_line_high); - scl_out(dd, i2c_line_high); - sda_out(dd, i2c_line_low); - scl_out(dd, i2c_line_low); - - /* issue length and direction byte */ - res = wr_byte(dd, offset_dir); - - if (res) - ipath_cdbg(VERBOSE, "No ack to complete start\n"); - - return res; -} - -/** - * stop_cmd - transmit the stop condition - * @dd: the infinipath device - * - * (both clock/data low, clock high, data high while clock is high) - */ -static void stop_cmd(struct ipath_devdata *dd) -{ - scl_out(dd, i2c_line_low); - sda_out(dd, i2c_line_low); - scl_out(dd, i2c_line_high); - sda_out(dd, i2c_line_high); - udelay(2); -} - -/** - * eeprom_reset - reset I2C communication - * @dd: the infinipath device - */ - -static int eeprom_reset(struct ipath_devdata *dd) -{ - int clock_cycles_left = 9; - u64 *gpioval = &dd->ipath_gpio_out; - int ret; - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - /* Make sure shadows are consistent */ - dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); - *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); - - ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg " - "is %llx\n", (unsigned long long) *gpioval); - - /* - * This is to get the i2c into a known state, by first going low, - * then tristate sda (and then tristate scl as first thing - * in loop) - */ - scl_out(dd, i2c_line_low); - sda_out(dd, i2c_line_high); - - /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */ - while (clock_cycles_left--) { - scl_out(dd, i2c_line_high); - - /* SDA seen high, issue START by dropping it while SCL high */ - if (sda_in(dd, 0)) { - sda_out(dd, i2c_line_low); - scl_out(dd, i2c_line_low); - /* ATMEL spec says must be followed by STOP. */ - scl_out(dd, i2c_line_high); - sda_out(dd, i2c_line_high); - ret = 0; - goto bail; - } - - scl_out(dd, i2c_line_low); - } - - ret = 1; - -bail: - return ret; -} - -/* - * Probe for I2C device at specified address. Returns 0 for "success" - * to match rest of this file. - * Leave bus in "reasonable" state for further commands. - */ -static int i2c_probe(struct ipath_devdata *dd, int devaddr) -{ - int ret = 0; - - ret = eeprom_reset(dd); - if (ret) { - ipath_dev_err(dd, "Failed reset probing device 0x%02X\n", - devaddr); - return ret; - } - /* - * Reset no longer leaves bus in start condition, so normal - * i2c_startcmd() will do. - */ - ret = i2c_startcmd(dd, devaddr | READ_CMD); - if (ret) - ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n", - devaddr); - else { - /* - * Device did respond. Complete a single-byte read, because some - * devices apparently cannot handle STOP immediately after they - * ACK the start-cmd. - */ - int data; - data = rd_byte(dd); - stop_cmd(dd); - ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr); - } - return ret; -} - -/* - * Returns the "i2c type". This is a pointer to a struct that describes - * the I2C chain on this board. To minimize impact on struct ipath_devdata, - * the (small integer) index into the table is actually memoized, rather - * then the pointer. - * Memoization is because the type is determined on the first call per chip. - * An alternative would be to move type determination to early - * init code. - */ -static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd) -{ - int idx; - - /* Get memoized index, from previous successful probes */ - idx = dd->ipath_i2c_chain_type - 1; - if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1)) - goto done; - - idx = 0; - while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) { - /* if probe succeeds, this is type */ - if (!i2c_probe(dd, i2c_chains[idx].probe_dev)) - break; - ++idx; - } - - /* - * Old EEPROM (first entry) may require a reset after probe, - * rather than being able to "start" after "stop" - */ - if (idx == 0) - eeprom_reset(dd); - - if (i2c_chains[idx].probe_dev == IPATH_NO_DEV) - idx = -1; - else - dd->ipath_i2c_chain_type = idx + 1; -done: - return (idx >= 0) ? i2c_chains + idx : NULL; -} - -static int ipath_eeprom_internal_read(struct ipath_devdata *dd, - u8 eeprom_offset, void *buffer, int len) -{ - int ret; - struct i2c_chain_desc *icd; - u8 *bp = buffer; - - ret = 1; - icd = ipath_i2c_type(dd); - if (!icd) - goto bail; - - if (icd->eeprom_dev == IPATH_NO_DEV) { - /* legacy not-really-I2C */ - ipath_cdbg(VERBOSE, "Start command only address\n"); - eeprom_offset = (eeprom_offset << 1) | READ_CMD; - ret = i2c_startcmd(dd, eeprom_offset); - } else { - /* Actual I2C */ - ipath_cdbg(VERBOSE, "Start command uses devaddr\n"); - if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) { - ipath_dbg("Failed EEPROM startcmd\n"); - stop_cmd(dd); - ret = 1; - goto bail; - } - ret = wr_byte(dd, eeprom_offset); - stop_cmd(dd); - if (ret) { - ipath_dev_err(dd, "Failed to write EEPROM address\n"); - ret = 1; - goto bail; - } - ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD); - } - if (ret) { - ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev); - stop_cmd(dd); - ret = 1; - goto bail; - } - - /* - * eeprom keeps clocking data out as long as we ack, automatically - * incrementing the address. - */ - while (len-- > 0) { - /* get and store data */ - *bp++ = rd_byte(dd); - /* send ack if not the last byte */ - if (len) - send_ack(dd); - } - - stop_cmd(dd); - - ret = 0; - -bail: - return ret; -} - -static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buffer, int len) -{ - int sub_len; - const u8 *bp = buffer; - int max_wait_time, i; - int ret; - struct i2c_chain_desc *icd; - - ret = 1; - icd = ipath_i2c_type(dd); - if (!icd) - goto bail; - - while (len > 0) { - if (icd->eeprom_dev == IPATH_NO_DEV) { - if (i2c_startcmd(dd, - (eeprom_offset << 1) | WRITE_CMD)) { - ipath_dbg("Failed to start cmd offset %u\n", - eeprom_offset); - goto failed_write; - } - } else { - /* Real I2C */ - if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) { - ipath_dbg("Failed EEPROM startcmd\n"); - goto failed_write; - } - ret = wr_byte(dd, eeprom_offset); - if (ret) { - ipath_dev_err(dd, "Failed to write EEPROM " - "address\n"); - goto failed_write; - } - } - - sub_len = min(len, 4); - eeprom_offset += sub_len; - len -= sub_len; - - for (i = 0; i < sub_len; i++) { - if (wr_byte(dd, *bp++)) { - ipath_dbg("no ack after byte %u/%u (%u " - "total remain)\n", i, sub_len, - len + sub_len - i); - goto failed_write; - } - } - - stop_cmd(dd); - - /* - * wait for write complete by waiting for a successful - * read (the chip replies with a zero after the write - * cmd completes, and before it writes to the eeprom. - * The startcmd for the read will fail the ack until - * the writes have completed. We do this inline to avoid - * the debug prints that are in the real read routine - * if the startcmd fails. - * We also use the proper device address, so it doesn't matter - * whether we have real eeprom_dev. legacy likes any address. - */ - max_wait_time = 100; - while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) { - stop_cmd(dd); - if (!--max_wait_time) { - ipath_dbg("Did not get successful read to " - "complete write\n"); - goto failed_write; - } - } - /* now read (and ignore) the resulting byte */ - rd_byte(dd); - stop_cmd(dd); - } - - ret = 0; - goto bail; - -failed_write: - stop_cmd(dd); - ret = 1; - -bail: - return ret; -} - -/** - * ipath_eeprom_read - receives bytes from the eeprom via I2C - * @dd: the infinipath device - * @eeprom_offset: address to read from - * @buffer: where to store result - * @len: number of bytes to receive - */ -int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, - void *buff, int len) -{ - int ret; - - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (!ret) { - ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); - mutex_unlock(&dd->ipath_eep_lock); - } - - return ret; -} - -/** - * ipath_eeprom_write - writes data to the eeprom via I2C - * @dd: the infinipath device - * @eeprom_offset: where to place data - * @buffer: data to write - * @len: number of bytes to write - */ -int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buff, int len) -{ - int ret; - - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (!ret) { - ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); - mutex_unlock(&dd->ipath_eep_lock); - } - - return ret; -} - -static u8 flash_csum(struct ipath_flash *ifp, int adjust) -{ - u8 *ip = (u8 *) ifp; - u8 csum = 0, len; - - /* - * Limit length checksummed to max length of actual data. - * Checksum of erased eeprom will still be bad, but we avoid - * reading past the end of the buffer we were passed. - */ - len = ifp->if_length; - if (len > sizeof(struct ipath_flash)) - len = sizeof(struct ipath_flash); - while (len--) - csum += *ip++; - csum -= ifp->if_csum; - csum = ~csum; - if (adjust) - ifp->if_csum = csum; - - return csum; -} - -/** - * ipath_get_guid - get the GUID from the i2c device - * @dd: the infinipath device - * - * We have the capability to use the ipath_nguid field, and get - * the guid from the first chip's flash, to use for all of them. - */ -void ipath_get_eeprom_info(struct ipath_devdata *dd) -{ - void *buf; - struct ipath_flash *ifp; - __be64 guid; - int len, eep_stat; - u8 csum, *bguid; - int t = dd->ipath_unit; - struct ipath_devdata *dd0 = ipath_lookup(0); - - if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) { - u8 oguid; - dd->ipath_guid = dd0->ipath_guid; - bguid = (u8 *) & dd->ipath_guid; - - oguid = bguid[7]; - bguid[7] += t; - if (oguid > bguid[7]) { - if (bguid[6] == 0xff) { - if (bguid[5] == 0xff) { - ipath_dev_err( - dd, - "Can't set %s GUID from " - "base, wraps to OUI!\n", - ipath_get_unit_name(t)); - dd->ipath_guid = 0; - goto bail; - } - bguid[5]++; - } - bguid[6]++; - } - dd->ipath_nguid = 1; - - ipath_dbg("nguid %u, so adding %u to device 0 guid, " - "for %llx\n", - dd0->ipath_nguid, t, - (unsigned long long) be64_to_cpu(dd->ipath_guid)); - goto bail; - } - - /* - * read full flash, not just currently used part, since it may have - * been written with a newer definition - * */ - len = sizeof(struct ipath_flash); - buf = vmalloc(len); - if (!buf) { - ipath_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for GUID\n", len); - goto bail; - } - - mutex_lock(&dd->ipath_eep_lock); - eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); - mutex_unlock(&dd->ipath_eep_lock); - - if (eep_stat) { - ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); - goto done; - } - ifp = (struct ipath_flash *)buf; - - csum = flash_csum(ifp, 0); - if (csum != ifp->if_csum) { - dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: " - "0x%x, not 0x%x\n", csum, ifp->if_csum); - goto done; - } - if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || - *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { - ipath_dev_err(dd, "Invalid GUID %llx from flash; " - "ignoring\n", - *(unsigned long long *) ifp->if_guid); - /* don't allow GUID if all 0 or all 1's */ - goto done; - } - - /* complain, but allow it */ - if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) - dev_info(&dd->pcidev->dev, "Warning, GUID %llx is " - "default, probably not correct!\n", - *(unsigned long long *) ifp->if_guid); - - bguid = ifp->if_guid; - if (!bguid[0] && !bguid[1] && !bguid[2]) { - /* original incorrect GUID format in flash; fix in - * core copy, by shifting up 2 octets; don't need to - * change top octet, since both it and shifted are - * 0.. */ - bguid[1] = bguid[3]; - bguid[2] = bguid[4]; - bguid[3] = bguid[4] = 0; - guid = *(__be64 *) ifp->if_guid; - ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, " - "shifting 2 octets\n"); - } else - guid = *(__be64 *) ifp->if_guid; - dd->ipath_guid = guid; - dd->ipath_nguid = ifp->if_numguid; - /* - * Things are slightly complicated by the desire to transparently - * support both the Pathscale 10-digit serial number and the QLogic - * 13-character version. - */ - if ((ifp->if_fversion > 1) && ifp->if_sprefix[0] - && ((u8 *)ifp->if_sprefix)[0] != 0xFF) { - /* This board has a Serial-prefix, which is stored - * elsewhere for backward-compatibility. - */ - char *snp = dd->ipath_serial; - memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix); - snp[sizeof ifp->if_sprefix] = '\0'; - len = strlen(snp); - snp += len; - len = (sizeof dd->ipath_serial) - len; - if (len > sizeof ifp->if_serial) { - len = sizeof ifp->if_serial; - } - memcpy(snp, ifp->if_serial, len); - } else - memcpy(dd->ipath_serial, ifp->if_serial, - sizeof ifp->if_serial); - if (!strstr(ifp->if_comment, "Tested successfully")) - ipath_dev_err(dd, "Board SN %s did not pass functional " - "test: %s\n", dd->ipath_serial, - ifp->if_comment); - - ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", - (unsigned long long) be64_to_cpu(dd->ipath_guid)); - - memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT); - /* - * Power-on (actually "active") hours are kept as little-endian value - * in EEPROM, but as seconds in a (possibly as small as 24-bit) - * atomic_t while running. - */ - atomic_set(&dd->ipath_active_time, 0); - dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); - -done: - vfree(buf); - -bail:; -} - -/** - * ipath_update_eeprom_log - copy active-time and error counters to eeprom - * @dd: the infinipath device - * - * Although the time is kept as seconds in the ipath_devdata struct, it is - * rounded to hours for re-write, as we have only 16 bits in EEPROM. - * First-cut code reads whole (expected) struct ipath_flash, modifies, - * re-writes. Future direction: read/write only what we need, assuming - * that the EEPROM had to have been "good enough" for driver init, and - * if not, we aren't making it worse. - * - */ - -int ipath_update_eeprom_log(struct ipath_devdata *dd) -{ - void *buf; - struct ipath_flash *ifp; - int len, hi_water; - uint32_t new_time, new_hrs; - u8 csum; - int ret, idx; - unsigned long flags; - - /* first, check if we actually need to do anything. */ - ret = 0; - for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { - if (dd->ipath_eep_st_new_errs[idx]) { - ret = 1; - break; - } - } - new_time = atomic_read(&dd->ipath_active_time); - - if (ret == 0 && new_time < 3600) - return 0; - - /* - * The quick-check above determined that there is something worthy - * of logging, so get current contents and do a more detailed idea. - * read full flash, not just currently used part, since it may have - * been written with a newer definition - */ - len = sizeof(struct ipath_flash); - buf = vmalloc(len); - ret = 1; - if (!buf) { - ipath_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for logging\n", len); - goto bail; - } - - /* Grab semaphore and read current EEPROM. If we get an - * error, let go, but if not, keep it until we finish write. - */ - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (ret) { - ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); - goto free_bail; - } - ret = ipath_eeprom_internal_read(dd, 0, buf, len); - if (ret) { - mutex_unlock(&dd->ipath_eep_lock); - ipath_dev_err(dd, "Unable read EEPROM for logging\n"); - goto free_bail; - } - ifp = (struct ipath_flash *)buf; - - csum = flash_csum(ifp, 0); - if (csum != ifp->if_csum) { - mutex_unlock(&dd->ipath_eep_lock); - ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", - csum, ifp->if_csum); - ret = 1; - goto free_bail; - } - hi_water = 0; - spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); - for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { - int new_val = dd->ipath_eep_st_new_errs[idx]; - if (new_val) { - /* - * If we have seen any errors, add to EEPROM values - * We need to saturate at 0xFF (255) and we also - * would need to adjust the checksum if we were - * trying to minimize EEPROM traffic - * Note that we add to actual current count in EEPROM, - * in case it was altered while we were running. - */ - new_val += ifp->if_errcntp[idx]; - if (new_val > 0xFF) - new_val = 0xFF; - if (ifp->if_errcntp[idx] != new_val) { - ifp->if_errcntp[idx] = new_val; - hi_water = offsetof(struct ipath_flash, - if_errcntp) + idx; - } - /* - * update our shadow (used to minimize EEPROM - * traffic), to match what we are about to write. - */ - dd->ipath_eep_st_errs[idx] = new_val; - dd->ipath_eep_st_new_errs[idx] = 0; - } - } - /* - * now update active-time. We would like to round to the nearest hour - * but unless atomic_t are sure to be proper signed ints we cannot, - * because we need to account for what we "transfer" to EEPROM and - * if we log an hour at 31 minutes, then we would need to set - * active_time to -29 to accurately count the _next_ hour. - */ - if (new_time >= 3600) { - new_hrs = new_time / 3600; - atomic_sub((new_hrs * 3600), &dd->ipath_active_time); - new_hrs += dd->ipath_eep_hrs; - if (new_hrs > 0xFFFF) - new_hrs = 0xFFFF; - dd->ipath_eep_hrs = new_hrs; - if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { - ifp->if_powerhour[0] = new_hrs & 0xFF; - hi_water = offsetof(struct ipath_flash, if_powerhour); - } - if ((new_hrs >> 8) != ifp->if_powerhour[1]) { - ifp->if_powerhour[1] = new_hrs >> 8; - hi_water = offsetof(struct ipath_flash, if_powerhour) - + 1; - } - } - /* - * There is a tiny possibility that we could somehow fail to write - * the EEPROM after updating our shadows, but problems from holding - * the spinlock too long are a much bigger issue. - */ - spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); - if (hi_water) { - /* we made some change to the data, uopdate cksum and write */ - csum = flash_csum(ifp, 1); - ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); - } - mutex_unlock(&dd->ipath_eep_lock); - if (ret) - ipath_dev_err(dd, "Failed updating EEPROM\n"); - -free_bail: - vfree(buf); -bail: - return ret; - -} - -/** - * ipath_inc_eeprom_err - increment one of the four error counters - * that are logged to EEPROM. - * @dd: the infinipath device - * @eidx: 0..3, the counter to increment - * @incr: how much to add - * - * Each counter is 8-bits, and saturates at 255 (0xFF). They - * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log() - * is called, but it can only be called in a context that allows sleep. - * This function can be called even at interrupt level. - */ - -void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr) -{ - uint new_val; - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); - new_val = dd->ipath_eep_st_new_errs[eidx] + incr; - if (new_val > 255) - new_val = 255; - dd->ipath_eep_st_new_errs[eidx] = new_val; - spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); - return; -} - -static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum) -{ - int ret; - struct i2c_chain_desc *icd; - - ret = -ENOENT; - - icd = ipath_i2c_type(dd); - if (!icd) - goto bail; - - if (icd->temp_dev == IPATH_NO_DEV) { - /* tempsense only exists on new, real-I2C boards */ - ret = -ENXIO; - goto bail; - } - - if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) { - ipath_dbg("Failed tempsense startcmd\n"); - stop_cmd(dd); - ret = -ENXIO; - goto bail; - } - ret = wr_byte(dd, regnum); - stop_cmd(dd); - if (ret) { - ipath_dev_err(dd, "Failed tempsense WR command %02X\n", - regnum); - ret = -ENXIO; - goto bail; - } - if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) { - ipath_dbg("Failed tempsense RD startcmd\n"); - stop_cmd(dd); - ret = -ENXIO; - goto bail; - } - /* - * We can only clock out one byte per command, sensibly - */ - ret = rd_byte(dd); - stop_cmd(dd); - -bail: - return ret; -} - -#define VALID_TS_RD_REG_MASK 0xBF - -/** - * ipath_tempsense_read - read register of temp sensor via I2C - * @dd: the infinipath device - * @regnum: register to read from - * - * returns reg contents (0..255) or < 0 for error - */ -int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum) -{ - int ret; - - if (regnum > 7) - return -EINVAL; - - /* return a bogus value for (the one) register we do not have */ - if (!((1 << regnum) & VALID_TS_RD_REG_MASK)) - return 0; - - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (!ret) { - ret = ipath_tempsense_internal_read(dd, regnum); - mutex_unlock(&dd->ipath_eep_lock); - } - - /* - * There are three possibilities here: - * ret is actual value (0..255) - * ret is -ENXIO or -EINVAL from code in this file - * ret is -EINTR from mutex_lock_interruptible. - */ - return ret; -} - -static int ipath_tempsense_internal_write(struct ipath_devdata *dd, - u8 regnum, u8 data) -{ - int ret = -ENOENT; - struct i2c_chain_desc *icd; - - icd = ipath_i2c_type(dd); - if (!icd) - goto bail; - - if (icd->temp_dev == IPATH_NO_DEV) { - /* tempsense only exists on new, real-I2C boards */ - ret = -ENXIO; - goto bail; - } - if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) { - ipath_dbg("Failed tempsense startcmd\n"); - stop_cmd(dd); - ret = -ENXIO; - goto bail; - } - ret = wr_byte(dd, regnum); - if (ret) { - stop_cmd(dd); - ipath_dev_err(dd, "Failed to write tempsense command %02X\n", - regnum); - ret = -ENXIO; - goto bail; - } - ret = wr_byte(dd, data); - stop_cmd(dd); - ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD); - if (ret) { - ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n", - regnum); - ret = -ENXIO; - } - -bail: - return ret; -} - -#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD)) - -/** - * ipath_tempsense_write - write register of temp sensor via I2C - * @dd: the infinipath device - * @regnum: register to write - * @data: data to write - * - * returns 0 for success or < 0 for error - */ -int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data) -{ - int ret; - - if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK)) - return -EINVAL; - - ret = mutex_lock_interruptible(&dd->ipath_eep_lock); - if (!ret) { - ret = ipath_tempsense_internal_write(dd, regnum, data); - mutex_unlock(&dd->ipath_eep_lock); - } - - /* - * There are three possibilities here: - * ret is 0 for success - * ret is -ENXIO or -EINVAL from code in this file - * ret is -EINTR from mutex_lock_interruptible. - */ - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_file_ops.c b/kernel/drivers/infiniband/hw/ipath/ipath_file_ops.c deleted file mode 100644 index 450d15965..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ /dev/null @@ -1,2620 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/pci.h> -#include <linux/poll.h> -#include <linux/cdev.h> -#include <linux/swap.h> -#include <linux/export.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <linux/io.h> -#include <linux/jiffies.h> -#include <linux/cpu.h> -#include <linux/uio.h> -#include <asm/pgtable.h> - -#include "ipath_kernel.h" -#include "ipath_common.h" -#include "ipath_user_sdma.h" - -static int ipath_open(struct inode *, struct file *); -static int ipath_close(struct inode *, struct file *); -static ssize_t ipath_write(struct file *, const char __user *, size_t, - loff_t *); -static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from); -static unsigned int ipath_poll(struct file *, struct poll_table_struct *); -static int ipath_mmap(struct file *, struct vm_area_struct *); - -/* - * This is really, really weird shit - write() and writev() here - * have completely unrelated semantics. Sucky userland ABI, - * film at 11. - */ -static const struct file_operations ipath_file_ops = { - .owner = THIS_MODULE, - .write = ipath_write, - .write_iter = ipath_write_iter, - .open = ipath_open, - .release = ipath_close, - .poll = ipath_poll, - .mmap = ipath_mmap, - .llseek = noop_llseek, -}; - -/* - * Convert kernel virtual addresses to physical addresses so they don't - * potentially conflict with the chip addresses used as mmap offsets. - * It doesn't really matter what mmap offset we use as long as we can - * interpret it correctly. - */ -static u64 cvt_kvaddr(void *p) -{ - struct page *page; - u64 paddr = 0; - - page = vmalloc_to_page(p); - if (page) - paddr = page_to_pfn(page) << PAGE_SHIFT; - - return paddr; -} - -static int ipath_get_base_info(struct file *fp, - void __user *ubase, size_t ubase_size) -{ - struct ipath_portdata *pd = port_fp(fp); - int ret = 0; - struct ipath_base_info *kinfo = NULL; - struct ipath_devdata *dd = pd->port_dd; - unsigned subport_cnt; - int shared, master; - size_t sz; - - subport_cnt = pd->port_subport_cnt; - if (!subport_cnt) { - shared = 0; - master = 0; - subport_cnt = 1; - } else { - shared = 1; - master = !subport_fp(fp); - } - - sz = sizeof(*kinfo); - /* If port sharing is not requested, allow the old size structure */ - if (!shared) - sz -= 7 * sizeof(u64); - if (ubase_size < sz) { - ipath_cdbg(PROC, - "Base size %zu, need %zu (version mismatch?)\n", - ubase_size, sz); - ret = -EINVAL; - goto bail; - } - - kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); - if (kinfo == NULL) { - ret = -ENOMEM; - goto bail; - } - - ret = dd->ipath_f_get_base_info(pd, kinfo); - if (ret < 0) - goto bail; - - kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt; - kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize; - kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt; - kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize; - /* - * have to mmap whole thing - */ - kinfo->spi_rcv_egrbuftotlen = - pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; - kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; - kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / - pd->port_rcvegrbuf_chunks; - kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; - if (master) - kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; - /* - * for this use, may be ipath_cfgports summed over all chips that - * are are configured and present - */ - kinfo->spi_nports = dd->ipath_cfgports; - /* unit (chip/board) our port is on */ - kinfo->spi_unit = dd->ipath_unit; - /* for now, only a single page */ - kinfo->spi_tid_maxsize = PAGE_SIZE; - - /* - * Doing this per port, and based on the skip value, etc. This has - * to be the actual buffer size, since the protocol code treats it - * as an array. - * - * These have to be set to user addresses in the user code via mmap. - * These values are used on return to user code for the mmap target - * addresses only. For 32 bit, same 44 bit address problem, so use - * the physical address, not virtual. Before 2.6.11, using the - * page_address() macro worked, but in 2.6.11, even that returns the - * full 64 bit address (upper bits all 1's). So far, using the - * physical addresses (or chip offsets, for chip mapping) works, but - * no doubt some future kernel release will change that, and we'll be - * on to yet another method of dealing with this. - */ - kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; - kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; - kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; - kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; - kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + - (void *) dd->ipath_statusp - - (void *) dd->ipath_pioavailregs_dma; - if (!shared) { - kinfo->spi_piocnt = pd->port_piocnt; - kinfo->spi_piobufbase = (u64) pd->port_piobufs; - kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_ureg_align * pd->port_port; - } else if (master) { - kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) + - (pd->port_piocnt % subport_cnt); - /* Master's PIO buffers are after all the slave's */ - kinfo->spi_piobufbase = (u64) pd->port_piobufs + - dd->ipath_palign * - (pd->port_piocnt - kinfo->spi_piocnt); - } else { - unsigned slave = subport_fp(fp) - 1; - - kinfo->spi_piocnt = pd->port_piocnt / subport_cnt; - kinfo->spi_piobufbase = (u64) pd->port_piobufs + - dd->ipath_palign * kinfo->spi_piocnt * slave; - } - - if (shared) { - kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_ureg_align * pd->port_port; - kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; - kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; - kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; - - kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase + - PAGE_SIZE * subport_fp(fp)); - - kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * subport_fp(fp)); - kinfo->spi_rcvhdr_tailaddr = 0; - kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf + - pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * - subport_fp(fp)); - - kinfo->spi_subport_uregbase = - cvt_kvaddr(pd->subport_uregbase); - kinfo->spi_subport_rcvegrbuf = - cvt_kvaddr(pd->subport_rcvegrbuf); - kinfo->spi_subport_rcvhdr_base = - cvt_kvaddr(pd->subport_rcvhdr_base); - ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", - kinfo->spi_port, kinfo->spi_runtime_flags, - (unsigned long long) kinfo->spi_subport_uregbase, - (unsigned long long) kinfo->spi_subport_rcvegrbuf, - (unsigned long long) kinfo->spi_subport_rcvhdr_base); - } - - /* - * All user buffers are 2KB buffers. If we ever support - * giving 4KB buffers to user processes, this will need some - * work. - */ - kinfo->spi_pioindex = (kinfo->spi_piobufbase - - (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign; - kinfo->spi_pioalign = dd->ipath_palign; - - kinfo->spi_qpair = IPATH_KD_QP; - /* - * user mode PIO buffers are always 2KB, even when 4KB can - * be received, and sent via the kernel; this is ibmaxlen - * for 2K MTU. - */ - kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32); - kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ - kinfo->spi_port = pd->port_port; - kinfo->spi_subport = subport_fp(fp); - kinfo->spi_sw_version = IPATH_KERN_SWVERSION; - kinfo->spi_hw_version = dd->ipath_revision; - - if (master) { - kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; - } - - sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); - if (copy_to_user(ubase, kinfo, sz)) - ret = -EFAULT; - -bail: - kfree(kinfo); - return ret; -} - -/** - * ipath_tid_update - update a port TID - * @pd: the port - * @fp: the ipath device file - * @ti: the TID information - * - * The new implementation as of Oct 2004 is that the driver assigns - * the tid and returns it to the caller. To make it easier to - * catch bugs, and to reduce search time, we keep a cursor for - * each port, walking the shadow tid array to find one that's not - * in use. - * - * For now, if we can't allocate the full list, we fail, although - * in the long run, we'll allocate as many as we can, and the - * caller will deal with that by trying the remaining pages later. - * That means that when we fail, we have to mark the tids as not in - * use again, in our shadow copy. - * - * It's up to the caller to free the tids when they are done. - * We'll unlock the pages as they free them. - * - * Also, right now we are locking one page at a time, but since - * the intended use of this routine is for a single group of - * virtually contiguous pages, that should change to improve - * performance. - */ -static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, - const struct ipath_tid_info *ti) -{ - int ret = 0, ntids; - u32 tid, porttid, cnt, i, tidcnt, tidoff; - u16 *tidlist; - struct ipath_devdata *dd = pd->port_dd; - u64 physaddr; - unsigned long vaddr; - u64 __iomem *tidbase; - unsigned long tidmap[8]; - struct page **pagep = NULL; - unsigned subport = subport_fp(fp); - - if (!dd->ipath_pageshadow) { - ret = -ENOMEM; - goto done; - } - - cnt = ti->tidcnt; - if (!cnt) { - ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n", - (unsigned long long) ti->tidlist); - /* - * Should we treat as success? likely a bug - */ - ret = -EFAULT; - goto done; - } - porttid = pd->port_port * dd->ipath_rcvtidcnt; - if (!pd->port_subport_cnt) { - tidcnt = dd->ipath_rcvtidcnt; - tid = pd->port_tidcursor; - tidoff = 0; - } else if (!subport) { - tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + - (dd->ipath_rcvtidcnt % pd->port_subport_cnt); - tidoff = dd->ipath_rcvtidcnt - tidcnt; - porttid += tidoff; - tid = tidcursor_fp(fp); - } else { - tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; - tidoff = tidcnt * (subport - 1); - porttid += tidoff; - tid = tidcursor_fp(fp); - } - if (cnt > tidcnt) { - /* make sure it all fits in port_tid_pg_list */ - dev_info(&dd->pcidev->dev, "Process tried to allocate %u " - "TIDs, only trying max (%u)\n", cnt, tidcnt); - cnt = tidcnt; - } - pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; - tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; - - memset(tidmap, 0, sizeof(tidmap)); - /* before decrement; chip actual # */ - ntids = tidcnt; - tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + - dd->ipath_rcvtidbase + - porttid * sizeof(*tidbase)); - - ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n", - pd->port_port, cnt, tid, tidbase); - - /* virtual address of first page in transfer */ - vaddr = ti->tidvaddr; - if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, - cnt * PAGE_SIZE)) { - ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n", - (void *)vaddr, cnt); - ret = -EFAULT; - goto done; - } - ret = ipath_get_user_pages(vaddr, cnt, pagep); - if (ret) { - if (ret == -EBUSY) { - ipath_dbg("Failed to lock addr %p, %u pages " - "(already locked)\n", - (void *) vaddr, cnt); - /* - * for now, continue, and see what happens but with - * the new implementation, this should never happen, - * unless perhaps the user has mpin'ed the pages - * themselves (something we need to test) - */ - ret = 0; - } else { - dev_info(&dd->pcidev->dev, - "Failed to lock addr %p, %u pages: " - "errno %d\n", (void *) vaddr, cnt, -ret); - goto done; - } - } - for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { - for (; ntids--; tid++) { - if (tid == tidcnt) - tid = 0; - if (!dd->ipath_pageshadow[porttid + tid]) - break; - } - if (ntids < 0) { - /* - * oops, wrapped all the way through their TIDs, - * and didn't have enough free; see comments at - * start of routine - */ - ipath_dbg("Not enough free TIDs for %u pages " - "(index %d), failing\n", cnt, i); - i--; /* last tidlist[i] not filled in */ - ret = -ENOMEM; - break; - } - tidlist[i] = tid + tidoff; - ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " - "vaddr %lx\n", i, tid + tidoff, vaddr); - /* we "know" system pages and TID pages are same size */ - dd->ipath_pageshadow[porttid + tid] = pagep[i]; - dd->ipath_physshadow[porttid + tid] = ipath_map_page( - dd->pcidev, pagep[i], 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - /* - * don't need atomic or it's overhead - */ - __set_bit(tid, tidmap); - physaddr = dd->ipath_physshadow[porttid + tid]; - ipath_stats.sps_pagelocks++; - ipath_cdbg(VERBOSE, - "TID %u, vaddr %lx, physaddr %llx pgp %p\n", - tid, vaddr, (unsigned long long) physaddr, - pagep[i]); - dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, - physaddr); - /* - * don't check this tid in ipath_portshadow, since we - * just filled it in; start with the next one. - */ - tid++; - } - - if (ret) { - u32 limit; - cleanup: - /* jump here if copy out of updated info failed... */ - ipath_dbg("After failure (ret=%d), undo %d of %d entries\n", - -ret, i, cnt); - /* same code that's in ipath_free_tid() */ - limit = sizeof(tidmap) * BITS_PER_BYTE; - if (limit > tidcnt) - /* just in case size changes in future */ - limit = tidcnt; - tid = find_first_bit((const unsigned long *)tidmap, limit); - for (; tid < limit; tid++) { - if (!test_bit(tid, tidmap)) - continue; - if (dd->ipath_pageshadow[porttid + tid]) { - ipath_cdbg(VERBOSE, "Freeing TID %u\n", - tid); - dd->ipath_f_put_tid(dd, &tidbase[tid], - RCVHQ_RCV_TYPE_EXPECTED, - dd->ipath_tidinvalid); - pci_unmap_page(dd->pcidev, - dd->ipath_physshadow[porttid + tid], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - dd->ipath_pageshadow[porttid + tid] = NULL; - ipath_stats.sps_pageunlocks++; - } - } - ipath_release_user_pages(pagep, cnt); - } else { - /* - * Copy the updated array, with ipath_tid's filled in, back - * to user. Since we did the copy in already, this "should - * never fail" If it does, we have to clean up... - */ - if (copy_to_user((void __user *) - (unsigned long) ti->tidlist, - tidlist, cnt * sizeof(*tidlist))) { - ret = -EFAULT; - goto cleanup; - } - if (copy_to_user((void __user *) (unsigned long) ti->tidmap, - tidmap, sizeof tidmap)) { - ret = -EFAULT; - goto cleanup; - } - if (tid == tidcnt) - tid = 0; - if (!pd->port_subport_cnt) - pd->port_tidcursor = tid; - else - tidcursor_fp(fp) = tid; - } - -done: - if (ret) - ipath_dbg("Failed to map %u TID pages, failing with %d\n", - ti->tidcnt, -ret); - return ret; -} - -/** - * ipath_tid_free - free a port TID - * @pd: the port - * @subport: the subport - * @ti: the TID info - * - * right now we are unlocking one page at a time, but since - * the intended use of this routine is for a single group of - * virtually contiguous pages, that should change to improve - * performance. We check that the TID is in range for this port - * but otherwise don't check validity; if user has an error and - * frees the wrong tid, it's only their own data that can thereby - * be corrupted. We do check that the TID was in use, for sanity - * We always use our idea of the saved address, not the address that - * they pass in to us. - */ - -static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, - const struct ipath_tid_info *ti) -{ - int ret = 0; - u32 tid, porttid, cnt, limit, tidcnt; - struct ipath_devdata *dd = pd->port_dd; - u64 __iomem *tidbase; - unsigned long tidmap[8]; - - if (!dd->ipath_pageshadow) { - ret = -ENOMEM; - goto done; - } - - if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, - sizeof tidmap)) { - ret = -EFAULT; - goto done; - } - - porttid = pd->port_port * dd->ipath_rcvtidcnt; - if (!pd->port_subport_cnt) - tidcnt = dd->ipath_rcvtidcnt; - else if (!subport) { - tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + - (dd->ipath_rcvtidcnt % pd->port_subport_cnt); - porttid += dd->ipath_rcvtidcnt - tidcnt; - } else { - tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; - porttid += tidcnt * (subport - 1); - } - tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvtidbase + - porttid * sizeof(*tidbase)); - - limit = sizeof(tidmap) * BITS_PER_BYTE; - if (limit > tidcnt) - /* just in case size changes in future */ - limit = tidcnt; - tid = find_first_bit(tidmap, limit); - ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) " - "set is %d, porttid %u\n", pd->port_port, ti->tidcnt, - limit, tid, porttid); - for (cnt = 0; tid < limit; tid++) { - /* - * small optimization; if we detect a run of 3 or so without - * any set, use find_first_bit again. That's mainly to - * accelerate the case where we wrapped, so we have some at - * the beginning, and some at the end, and a big gap - * in the middle. - */ - if (!test_bit(tid, tidmap)) - continue; - cnt++; - if (dd->ipath_pageshadow[porttid + tid]) { - struct page *p; - p = dd->ipath_pageshadow[porttid + tid]; - dd->ipath_pageshadow[porttid + tid] = NULL; - ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", - pid_nr(pd->port_pid), tid); - dd->ipath_f_put_tid(dd, &tidbase[tid], - RCVHQ_RCV_TYPE_EXPECTED, - dd->ipath_tidinvalid); - pci_unmap_page(dd->pcidev, - dd->ipath_physshadow[porttid + tid], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - ipath_release_user_pages(&p, 1); - ipath_stats.sps_pageunlocks++; - } else - ipath_dbg("Unused tid %u, ignoring\n", tid); - } - if (cnt != ti->tidcnt) - ipath_dbg("passed in tidcnt %d, only %d bits set in map\n", - ti->tidcnt, cnt); -done: - if (ret) - ipath_dbg("Failed to unmap %u TID pages, failing with %d\n", - ti->tidcnt, -ret); - return ret; -} - -/** - * ipath_set_part_key - set a partition key - * @pd: the port - * @key: the key - * - * We can have up to 4 active at a time (other than the default, which is - * always allowed). This is somewhat tricky, since multiple ports may set - * the same key, so we reference count them, and clean up at exit. All 4 - * partition keys are packed into a single infinipath register. It's an - * error for a process to set the same pkey multiple times. We provide no - * mechanism to de-allocate a pkey at this time, we may eventually need to - * do that. I've used the atomic operations, and no locking, and only make - * a single pass through what's available. This should be more than - * adequate for some time. I'll think about spinlocks or the like if and as - * it's necessary. - */ -static int ipath_set_part_key(struct ipath_portdata *pd, u16 key) -{ - struct ipath_devdata *dd = pd->port_dd; - int i, any = 0, pidx = -1; - u16 lkey = key & 0x7FFF; - int ret; - - if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) { - /* nothing to do; this key always valid */ - ret = 0; - goto bail; - } - - ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys " - "%hx:%x %hx:%x %hx:%x %hx:%x\n", - pd->port_port, key, dd->ipath_pkeys[0], - atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1], - atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2], - atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3], - atomic_read(&dd->ipath_pkeyrefs[3])); - - if (!lkey) { - ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n", - pd->port_port); - ret = -EINVAL; - goto bail; - } - - /* - * Set the full membership bit, because it has to be - * set in the register or the packet, and it seems - * cleaner to set in the register than to force all - * callers to set it. (see bug 4331) - */ - key |= 0x8000; - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - if (!pd->port_pkeys[i] && pidx == -1) - pidx = i; - if (pd->port_pkeys[i] == key) { - ipath_cdbg(VERBOSE, "p%u tries to set same pkey " - "(%x) more than once\n", - pd->port_port, key); - ret = -EEXIST; - goto bail; - } - } - if (pidx == -1) { - ipath_dbg("All pkeys for port %u already in use, " - "can't set %x\n", pd->port_port, key); - ret = -EBUSY; - goto bail; - } - for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i]) { - any++; - continue; - } - if (dd->ipath_pkeys[i] == key) { - atomic_t *pkrefs = &dd->ipath_pkeyrefs[i]; - - if (atomic_inc_return(pkrefs) > 1) { - pd->port_pkeys[pidx] = key; - ipath_cdbg(VERBOSE, "p%u set key %x " - "matches #%d, count now %d\n", - pd->port_port, key, i, - atomic_read(pkrefs)); - ret = 0; - goto bail; - } else { - /* - * lost race, decrement count, catch below - */ - atomic_dec(pkrefs); - ipath_cdbg(VERBOSE, "Lost race, count was " - "0, after dec, it's %d\n", - atomic_read(pkrefs)); - any++; - } - } - if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { - /* - * It makes no sense to have both the limited and - * full membership PKEY set at the same time since - * the unlimited one will disable the limited one. - */ - ret = -EEXIST; - goto bail; - } - } - if (!any) { - ipath_dbg("port %u, all pkeys already in use, " - "can't set %x\n", pd->port_port, key); - ret = -EBUSY; - goto bail; - } - for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i] && - atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { - u64 pkey; - - /* for ipathstats, etc. */ - ipath_stats.sps_pkeys[i] = lkey; - pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key; - pkey = - (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(PROC, "p%u set key %x in #%d, " - "portidx %d, new pkey reg %llx\n", - pd->port_port, key, i, pidx, - (unsigned long long) pkey); - ipath_write_kreg( - dd, dd->ipath_kregs->kr_partitionkey, pkey); - - ret = 0; - goto bail; - } - } - ipath_dbg("port %u, all pkeys already in use 2nd pass, " - "can't set %x\n", pd->port_port, key); - ret = -EBUSY; - -bail: - return ret; -} - -/** - * ipath_manage_rcvq - manage a port's receive queue - * @pd: the port - * @subport: the subport - * @start_stop: action to carry out - * - * start_stop == 0 disables receive on the port, for use in queue - * overflow conditions. start_stop==1 re-enables, to be used to - * re-init the software copy of the head register - */ -static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, - int start_stop) -{ - struct ipath_devdata *dd = pd->port_dd; - - ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", - start_stop ? "en" : "dis", dd->ipath_unit, - pd->port_port, subport); - if (subport) - goto bail; - /* atomically clear receive enable port. */ - if (start_stop) { - /* - * On enable, force in-memory copy of the tail register to - * 0, so that protocol code doesn't have to worry about - * whether or not the chip has yet updated the in-memory - * copy or not on return from the system call. The chip - * always resets it's tail register back to 0 on a - * transition from disabled to enabled. This could cause a - * problem if software was broken, and did the enable w/o - * the disable, but eventually the in-memory copy will be - * updated and correct itself, even in the face of software - * bugs. - */ - if (pd->port_rcvhdrtail_kvaddr) - ipath_clear_rcvhdrtail(pd); - set_bit(dd->ipath_r_portenable_shift + pd->port_port, - &dd->ipath_rcvctrl); - } else - clear_bit(dd->ipath_r_portenable_shift + pd->port_port, - &dd->ipath_rcvctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - /* now be sure chip saw it before we return */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - if (start_stop) { - /* - * And try to be sure that tail reg update has happened too. - * This should in theory interlock with the RXE changes to - * the tail register. Don't assign it to the tail register - * in memory copy, since we could overwrite an update by the - * chip if we did. - */ - ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); - } - /* always; new head should be equal to new tail; see above */ -bail: - return 0; -} - -static void ipath_clean_part_key(struct ipath_portdata *pd, - struct ipath_devdata *dd) -{ - int i, j, pchanged = 0; - u64 oldpkey; - - /* for debugging only */ - oldpkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - if (!pd->port_pkeys[i]) - continue; - ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i, - pd->port_pkeys[i]); - for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) { - /* check for match independent of the global bit */ - if ((dd->ipath_pkeys[j] & 0x7fff) != - (pd->port_pkeys[i] & 0x7fff)) - continue; - if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) { - ipath_cdbg(VERBOSE, "p%u clear key " - "%x matches #%d\n", - pd->port_port, - pd->port_pkeys[i], j); - ipath_stats.sps_pkeys[j] = - dd->ipath_pkeys[j] = 0; - pchanged++; - } - else ipath_cdbg( - VERBOSE, "p%u key %x matches #%d, " - "but ref still %d\n", pd->port_port, - pd->port_pkeys[i], j, - atomic_read(&dd->ipath_pkeyrefs[j])); - break; - } - pd->port_pkeys[i] = 0; - } - if (pchanged) { - u64 pkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, " - "new pkey reg %llx\n", pd->port_port, - (unsigned long long) oldpkey, - (unsigned long long) pkey); - ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, - pkey); - } -} - -/* - * Initialize the port data with the receive buffer sizes - * so this can be done while the master port is locked. - * Otherwise, there is a race with a slave opening the port - * and seeing these fields uninitialized. - */ -static void init_user_egr_sizes(struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - unsigned egrperchunk, egrcnt, size; - - /* - * to avoid wasting a lot of memory, we allocate 32KB chunks of - * physically contiguous memory, advance through it until used up - * and then allocate more. Of course, we need memory to store those - * extra pointers, now. Started out with 256KB, but under heavy - * memory pressure (creating large files and then copying them over - * NFS while doing lots of MPI jobs), we hit some allocation - * failures, even though we can sleep... (2.6.10) Still get - * failures at 64K. 32K is the lowest we can go without wasting - * additional memory. - */ - size = 0x8000; - egrperchunk = size / dd->ipath_rcvegrbufsize; - egrcnt = dd->ipath_rcvegrcnt; - pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; - pd->port_rcvegrbufs_perchunk = egrperchunk; - pd->port_rcvegrbuf_size = size; -} - -/** - * ipath_create_user_egr - allocate eager TID buffers - * @pd: the port to allocate TID buffers for - * - * This routine is now quite different for user and kernel, because - * the kernel uses skb's, for the accelerated network performance - * This is the user port version - * - * Allocate the eager TID buffers and program them into infinipath - * They are no longer completely contiguous, we do multiple allocation - * calls. - */ -static int ipath_create_user_egr(struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; - size_t size; - int ret; - gfp_t gfp_flags; - - /* - * GFP_USER, but without GFP_FS, so buffer cache can be - * coalesced (we hope); otherwise, even at order 4, - * heavy filesystem activity makes these fail, and we can - * use compound pages. - */ - gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; - - egrcnt = dd->ipath_rcvegrcnt; - /* TID number offset for this port */ - egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt; - egrsize = dd->ipath_rcvegrbufsize; - ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " - "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); - - chunk = pd->port_rcvegrbuf_chunks; - egrperchunk = pd->port_rcvegrbufs_perchunk; - size = pd->port_rcvegrbuf_size; - pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), - GFP_KERNEL); - if (!pd->port_rcvegrbuf) { - ret = -ENOMEM; - goto bail; - } - pd->port_rcvegrbuf_phys = - kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), - GFP_KERNEL); - if (!pd->port_rcvegrbuf_phys) { - ret = -ENOMEM; - goto bail_rcvegrbuf; - } - for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { - - pd->port_rcvegrbuf[e] = dma_alloc_coherent( - &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e], - gfp_flags); - - if (!pd->port_rcvegrbuf[e]) { - ret = -ENOMEM; - goto bail_rcvegrbuf_phys; - } - } - - pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0]; - - for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) { - dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk]; - unsigned i; - - for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { - dd->ipath_f_put_tid(dd, e + egroff + - (u64 __iomem *) - ((char __iomem *) - dd->ipath_kregbase + - dd->ipath_rcvegrbase), - RCVHQ_RCV_TYPE_EAGER, pa); - pa += egrsize; - } - cond_resched(); /* don't hog the cpu */ - } - - ret = 0; - goto bail; - -bail_rcvegrbuf_phys: - for (e = 0; e < pd->port_rcvegrbuf_chunks && - pd->port_rcvegrbuf[e]; e++) { - dma_free_coherent(&dd->pcidev->dev, size, - pd->port_rcvegrbuf[e], - pd->port_rcvegrbuf_phys[e]); - - } - kfree(pd->port_rcvegrbuf_phys); - pd->port_rcvegrbuf_phys = NULL; -bail_rcvegrbuf: - kfree(pd->port_rcvegrbuf); - pd->port_rcvegrbuf = NULL; -bail: - return ret; -} - - -/* common code for the mappings on dma_alloc_coherent mem */ -static int ipath_mmap_mem(struct vm_area_struct *vma, - struct ipath_portdata *pd, unsigned len, int write_ok, - void *kvaddr, char *what) -{ - struct ipath_devdata *dd = pd->port_dd; - unsigned long pfn; - int ret; - - if ((vma->vm_end - vma->vm_start) > len) { - dev_info(&dd->pcidev->dev, - "FAIL on %s: len %lx > %x\n", what, - vma->vm_end - vma->vm_start, len); - ret = -EFAULT; - goto bail; - } - - if (!write_ok) { - if (vma->vm_flags & VM_WRITE) { - dev_info(&dd->pcidev->dev, - "%s must be mapped readonly\n", what); - ret = -EPERM; - goto bail; - } - - /* don't allow them to later change with mprotect */ - vma->vm_flags &= ~VM_MAYWRITE; - } - - pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; - ret = remap_pfn_range(vma, vma->vm_start, pfn, - len, vma->vm_page_prot); - if (ret) - dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " - "bytes r%c failed: %d\n", what, pd->port_port, - pfn, len, write_ok?'w':'o', ret); - else - ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " - "r%c\n", what, pd->port_port, pfn, len, - write_ok?'w':'o'); -bail: - return ret; -} - -static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, - u64 ureg) -{ - unsigned long phys; - int ret; - - /* - * This is real hardware, so use io_remap. This is the mechanism - * for the user process to update the head registers for their port - * in the chip. - */ - if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { - dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " - "%lx > PAGE\n", vma->vm_end - vma->vm_start); - ret = -EFAULT; - } else { - phys = dd->ipath_physaddr + ureg; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - ret = io_remap_pfn_range(vma, vma->vm_start, - phys >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); - } - return ret; -} - -static int mmap_piobufs(struct vm_area_struct *vma, - struct ipath_devdata *dd, - struct ipath_portdata *pd, - unsigned piobufs, unsigned piocnt) -{ - unsigned long phys; - int ret; - - /* - * When we map the PIO buffers in the chip, we want to map them as - * writeonly, no read possible. This prevents access to previous - * process data, and catches users who might try to read the i/o - * space due to a bug. - */ - if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { - dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " - "reqlen %lx > PAGE\n", - vma->vm_end - vma->vm_start); - ret = -EINVAL; - goto bail; - } - - phys = dd->ipath_physaddr + piobufs; - -#if defined(__powerpc__) - /* There isn't a generic way to specify writethrough mappings */ - pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; - pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; - pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; -#endif - - /* - * don't allow them to later change to readable with mprotect (for when - * not initially mapped readable, as is normally the case) - */ - vma->vm_flags &= ~VM_MAYREAD; - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - - ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -bail: - return ret; -} - -static int mmap_rcvegrbufs(struct vm_area_struct *vma, - struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - unsigned long start, size; - size_t total_size, i; - unsigned long pfn; - int ret; - - size = pd->port_rcvegrbuf_size; - total_size = pd->port_rcvegrbuf_chunks * size; - if ((vma->vm_end - vma->vm_start) > total_size) { - dev_info(&dd->pcidev->dev, "FAIL on egr bufs: " - "reqlen %lx > actual %lx\n", - vma->vm_end - vma->vm_start, - (unsigned long) total_size); - ret = -EINVAL; - goto bail; - } - - if (vma->vm_flags & VM_WRITE) { - dev_info(&dd->pcidev->dev, "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); - ret = -EPERM; - goto bail; - } - /* don't allow them to later change to writeable with mprotect */ - vma->vm_flags &= ~VM_MAYWRITE; - - start = vma->vm_start; - - for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { - pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; - ret = remap_pfn_range(vma, start, pfn, size, - vma->vm_page_prot); - if (ret < 0) - goto bail; - } - ret = 0; - -bail: - return ret; -} - -/* - * ipath_file_vma_fault - handle a VMA page fault. - */ -static int ipath_file_vma_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - struct page *page; - - page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); - if (!page) - return VM_FAULT_SIGBUS; - get_page(page); - vmf->page = page; - - return 0; -} - -static const struct vm_operations_struct ipath_file_vm_ops = { - .fault = ipath_file_vma_fault, -}; - -static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, - struct ipath_portdata *pd, unsigned subport) -{ - unsigned long len; - struct ipath_devdata *dd; - void *addr; - size_t size; - int ret = 0; - - /* If the port is not shared, all addresses should be physical */ - if (!pd->port_subport_cnt) - goto bail; - - dd = pd->port_dd; - size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; - - /* - * Each process has all the subport uregbase, rcvhdrq, and - * rcvegrbufs mmapped - as an array for all the processes, - * and also separately for this process. - */ - if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) { - addr = pd->subport_uregbase; - size = PAGE_SIZE * pd->port_subport_cnt; - } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) { - addr = pd->subport_rcvhdr_base; - size = pd->port_rcvhdrq_size * pd->port_subport_cnt; - } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) { - addr = pd->subport_rcvegrbuf; - size *= pd->port_subport_cnt; - } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase + - PAGE_SIZE * subport)) { - addr = pd->subport_uregbase + PAGE_SIZE * subport; - size = PAGE_SIZE; - } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * subport)) { - addr = pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * subport; - size = pd->port_rcvhdrq_size; - } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf + - size * subport)) { - addr = pd->subport_rcvegrbuf + size * subport; - /* rcvegrbufs are read-only on the slave */ - if (vma->vm_flags & VM_WRITE) { - dev_info(&dd->pcidev->dev, - "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); - ret = -EPERM; - goto bail; - } - /* - * Don't allow permission to later change to writeable - * with mprotect. - */ - vma->vm_flags &= ~VM_MAYWRITE; - } else { - goto bail; - } - len = vma->vm_end - vma->vm_start; - if (len > size) { - ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); - ret = -EINVAL; - goto bail; - } - - vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; - vma->vm_ops = &ipath_file_vm_ops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - ret = 1; - -bail: - return ret; -} - -/** - * ipath_mmap - mmap various structures into user space - * @fp: the file pointer - * @vma: the VM area - * - * We use this to have a shared buffer between the kernel and the user code - * for the rcvhdr queue, egr buffers, and the per-port user regs and pio - * buffers in the chip. We have the open and close entries so we can bump - * the ref count and keep the driver from being unloaded while still mapped. - */ -static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) -{ - struct ipath_portdata *pd; - struct ipath_devdata *dd; - u64 pgaddr, ureg; - unsigned piobufs, piocnt; - int ret; - - pd = port_fp(fp); - if (!pd) { - ret = -EINVAL; - goto bail; - } - dd = pd->port_dd; - - /* - * This is the ipath_do_user_init() code, mapping the shared buffers - * into the user process. The address referred to by vm_pgoff is the - * file offset passed via mmap(). For shared ports, this is the - * kernel vmalloc() address of the pages to share with the master. - * For non-shared or master ports, this is a physical address. - * We only do one mmap for each space mapped. - */ - pgaddr = vma->vm_pgoff << PAGE_SHIFT; - - /* - * Check for 0 in case one of the allocations failed, but user - * called mmap anyway. - */ - if (!pgaddr) { - ret = -EINVAL; - goto bail; - } - - ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", - (unsigned long long) pgaddr, vma->vm_start, - vma->vm_end - vma->vm_start, dd->ipath_unit, - pd->port_port, subport_fp(fp)); - - /* - * Physical addresses must fit in 40 bits for our hardware. - * Check for kernel virtual addresses first, anything else must - * match a HW or memory address. - */ - ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); - if (ret) { - if (ret > 0) - ret = 0; - goto bail; - } - - ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; - if (!pd->port_subport_cnt) { - /* port is not shared */ - piocnt = pd->port_piocnt; - piobufs = pd->port_piobufs; - } else if (!subport_fp(fp)) { - /* caller is the master */ - piocnt = (pd->port_piocnt / pd->port_subport_cnt) + - (pd->port_piocnt % pd->port_subport_cnt); - piobufs = pd->port_piobufs + - dd->ipath_palign * (pd->port_piocnt - piocnt); - } else { - unsigned slave = subport_fp(fp) - 1; - - /* caller is a slave */ - piocnt = pd->port_piocnt / pd->port_subport_cnt; - piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; - } - - if (pgaddr == ureg) - ret = mmap_ureg(vma, dd, ureg); - else if (pgaddr == piobufs) - ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); - else if (pgaddr == dd->ipath_pioavailregs_phys) - /* in-memory copy of pioavail registers */ - ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - (void *) dd->ipath_pioavailregs_dma, - "pioavail registers"); - else if (pgaddr == pd->port_rcvegr_phys) - ret = mmap_rcvegrbufs(vma, pd); - else if (pgaddr == (u64) pd->port_rcvhdrq_phys) - /* - * The rcvhdrq itself; readonly except on HT (so have - * to allow writable mapping), multiple pages, contiguous - * from an i/o perspective. - */ - ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, - pd->port_rcvhdrq, - "rcvhdrq"); - else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) - /* in-memory copy of rcvhdrq tail register */ - ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - pd->port_rcvhdrtail_kvaddr, - "rcvhdrq tail"); - else - ret = -EINVAL; - - vma->vm_private_data = NULL; - - if (ret < 0) - dev_info(&dd->pcidev->dev, - "Failure %d on off %llx len %lx\n", - -ret, (unsigned long long)pgaddr, - vma->vm_end - vma->vm_start); -bail: - return ret; -} - -static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd) -{ - unsigned pollflag = 0; - - if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) && - pd->port_hdrqfull != pd->port_hdrqfull_poll) { - pollflag |= POLLIN | POLLRDNORM; - pd->port_hdrqfull_poll = pd->port_hdrqfull; - } - - return pollflag; -} - -static unsigned int ipath_poll_urgent(struct ipath_portdata *pd, - struct file *fp, - struct poll_table_struct *pt) -{ - unsigned pollflag = 0; - struct ipath_devdata *dd; - - dd = pd->port_dd; - - /* variable access in ipath_poll_hdrqfull() needs this */ - rmb(); - pollflag = ipath_poll_hdrqfull(pd); - - if (pd->port_urgent != pd->port_urgent_poll) { - pollflag |= POLLIN | POLLRDNORM; - pd->port_urgent_poll = pd->port_urgent; - } - - if (!pollflag) { - /* this saves a spin_lock/unlock in interrupt handler... */ - set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag); - /* flush waiting flag so don't miss an event... */ - wmb(); - poll_wait(fp, &pd->port_wait, pt); - } - - return pollflag; -} - -static unsigned int ipath_poll_next(struct ipath_portdata *pd, - struct file *fp, - struct poll_table_struct *pt) -{ - u32 head; - u32 tail; - unsigned pollflag = 0; - struct ipath_devdata *dd; - - dd = pd->port_dd; - - /* variable access in ipath_poll_hdrqfull() needs this */ - rmb(); - pollflag = ipath_poll_hdrqfull(pd); - - head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); - if (pd->port_rcvhdrtail_kvaddr) - tail = ipath_get_rcvhdrtail(pd); - else - tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); - - if (head != tail) - pollflag |= POLLIN | POLLRDNORM; - else { - /* this saves a spin_lock/unlock in interrupt handler */ - set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - /* flush waiting flag so we don't miss an event */ - wmb(); - - set_bit(pd->port_port + dd->ipath_r_intravail_shift, - &dd->ipath_rcvctrl); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ - ipath_write_ureg(dd, ur_rcvhdrhead, - dd->ipath_rhdrhead_intr_off | head, - pd->port_port); - - poll_wait(fp, &pd->port_wait, pt); - } - - return pollflag; -} - -static unsigned int ipath_poll(struct file *fp, - struct poll_table_struct *pt) -{ - struct ipath_portdata *pd; - unsigned pollflag; - - pd = port_fp(fp); - if (!pd) - pollflag = 0; - else if (pd->poll_type & IPATH_POLL_TYPE_URGENT) - pollflag = ipath_poll_urgent(pd, fp, pt); - else - pollflag = ipath_poll_next(pd, fp, pt); - - return pollflag; -} - -static int ipath_supports_subports(int user_swmajor, int user_swminor) -{ - /* no subport implementation prior to software version 1.3 */ - return (user_swmajor > 1) || (user_swminor >= 3); -} - -static int ipath_compatible_subports(int user_swmajor, int user_swminor) -{ - /* this code is written long-hand for clarity */ - if (IPATH_USER_SWMAJOR != user_swmajor) { - /* no promise of compatibility if major mismatch */ - return 0; - } - if (IPATH_USER_SWMAJOR == 1) { - switch (IPATH_USER_SWMINOR) { - case 0: - case 1: - case 2: - /* no subport implementation so cannot be compatible */ - return 0; - case 3: - /* 3 is only compatible with itself */ - return user_swminor == 3; - default: - /* >= 4 are compatible (or are expected to be) */ - return user_swminor >= 4; - } - } - /* make no promises yet for future major versions */ - return 0; -} - -static int init_subports(struct ipath_devdata *dd, - struct ipath_portdata *pd, - const struct ipath_user_info *uinfo) -{ - int ret = 0; - unsigned num_subports; - size_t size; - - /* - * If the user is requesting zero subports, - * skip the subport allocation. - */ - if (uinfo->spu_subport_cnt <= 0) - goto bail; - - /* Self-consistency check for ipath_compatible_subports() */ - if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) && - !ipath_compatible_subports(IPATH_USER_SWMAJOR, - IPATH_USER_SWMINOR)) { - dev_info(&dd->pcidev->dev, - "Inconsistent ipath_compatible_subports()\n"); - goto bail; - } - - /* Check for subport compatibility */ - if (!ipath_compatible_subports(uinfo->spu_userversion >> 16, - uinfo->spu_userversion & 0xffff)) { - dev_info(&dd->pcidev->dev, - "Mismatched user version (%d.%d) and driver " - "version (%d.%d) while port sharing. Ensure " - "that driver and library are from the same " - "release.\n", - (int) (uinfo->spu_userversion >> 16), - (int) (uinfo->spu_userversion & 0xffff), - IPATH_USER_SWMAJOR, - IPATH_USER_SWMINOR); - goto bail; - } - if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) { - ret = -EINVAL; - goto bail; - } - - num_subports = uinfo->spu_subport_cnt; - pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports); - if (!pd->subport_uregbase) { - ret = -ENOMEM; - goto bail; - } - /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ - size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * - sizeof(u32), PAGE_SIZE) * num_subports; - pd->subport_rcvhdr_base = vzalloc(size); - if (!pd->subport_rcvhdr_base) { - ret = -ENOMEM; - goto bail_ureg; - } - - pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks * - pd->port_rcvegrbuf_size * - num_subports); - if (!pd->subport_rcvegrbuf) { - ret = -ENOMEM; - goto bail_rhdr; - } - - pd->port_subport_cnt = uinfo->spu_subport_cnt; - pd->port_subport_id = uinfo->spu_subport_id; - pd->active_slaves = 1; - set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); - goto bail; - -bail_rhdr: - vfree(pd->subport_rcvhdr_base); -bail_ureg: - vfree(pd->subport_uregbase); - pd->subport_uregbase = NULL; -bail: - return ret; -} - -static int try_alloc_port(struct ipath_devdata *dd, int port, - struct file *fp, - const struct ipath_user_info *uinfo) -{ - struct ipath_portdata *pd; - int ret; - - if (!(pd = dd->ipath_pd[port])) { - void *ptmp; - - pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); - - /* - * Allocate memory for use in ipath_tid_update() just once - * at open, not per call. Reduces cost of expected send - * setup. - */ - ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + - dd->ipath_rcvtidcnt * sizeof(struct page **), - GFP_KERNEL); - if (!pd || !ptmp) { - ipath_dev_err(dd, "Unable to allocate portdata " - "memory, failing open\n"); - ret = -ENOMEM; - kfree(pd); - kfree(ptmp); - goto bail; - } - dd->ipath_pd[port] = pd; - dd->ipath_pd[port]->port_port = port; - dd->ipath_pd[port]->port_dd = dd; - dd->ipath_pd[port]->port_tid_pg_list = ptmp; - init_waitqueue_head(&dd->ipath_pd[port]->port_wait); - } - if (!pd->port_cnt) { - pd->userversion = uinfo->spu_userversion; - init_user_egr_sizes(pd); - if ((ret = init_subports(dd, pd, uinfo)) != 0) - goto bail; - ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", - current->comm, current->pid, dd->ipath_unit, - port); - pd->port_cnt = 1; - port_fp(fp) = pd; - pd->port_pid = get_pid(task_pid(current)); - strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); - ipath_stats.sps_ports++; - ret = 0; - } else - ret = -EBUSY; - -bail: - return ret; -} - -static inline int usable(struct ipath_devdata *dd) -{ - return dd && - (dd->ipath_flags & IPATH_PRESENT) && - dd->ipath_kregbase && - dd->ipath_lid && - !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED - | IPATH_LINKUNK)); -} - -static int find_free_port(int unit, struct file *fp, - const struct ipath_user_info *uinfo) -{ - struct ipath_devdata *dd = ipath_lookup(unit); - int ret, i; - - if (!dd) { - ret = -ENODEV; - goto bail; - } - - if (!usable(dd)) { - ret = -ENETDOWN; - goto bail; - } - - for (i = 1; i < dd->ipath_cfgports; i++) { - ret = try_alloc_port(dd, i, fp, uinfo); - if (ret != -EBUSY) - goto bail; - } - ret = -EBUSY; - -bail: - return ret; -} - -static int find_best_unit(struct file *fp, - const struct ipath_user_info *uinfo) -{ - int ret = 0, i, prefunit = -1, devmax; - int maxofallports, npresent, nup; - int ndev; - - devmax = ipath_count_units(&npresent, &nup, &maxofallports); - - /* - * This code is present to allow a knowledgeable person to - * specify the layout of processes to processors before opening - * this driver, and then we'll assign the process to the "closest" - * InfiniPath chip to that processor (we assume reasonable connectivity, - * for now). This code assumes that if affinity has been set - * before this point, that at most one cpu is set; for now this - * is reasonable. I check for both cpumask_empty() and cpumask_full(), - * in case some kernel variant sets none of the bits when no - * affinity is set. 2.6.11 and 12 kernels have all present - * cpus set. Some day we'll have to fix it up further to handle - * a cpu subset. This algorithm fails for two HT chips connected - * in tunnel fashion. Eventually this needs real topology - * information. There may be some issues with dual core numbering - * as well. This needs more work prior to release. - */ - if (!cpumask_empty(tsk_cpus_allowed(current)) && - !cpumask_full(tsk_cpus_allowed(current))) { - int ncpus = num_online_cpus(), curcpu = -1, nset = 0; - get_online_cpus(); - for_each_online_cpu(i) - if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) { - ipath_cdbg(PROC, "%s[%u] affinity set for " - "cpu %d/%d\n", current->comm, - current->pid, i, ncpus); - curcpu = i; - nset++; - } - put_online_cpus(); - if (curcpu != -1 && nset != ncpus) { - if (npresent) { - prefunit = curcpu / (ncpus / npresent); - ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, " - "%d cpus/chip, select unit %d\n", - current->comm, current->pid, - npresent, ncpus, ncpus / npresent, - prefunit); - } - } - } - - /* - * user ports start at 1, kernel port is 0 - * For now, we do round-robin access across all chips - */ - - if (prefunit != -1) - devmax = prefunit + 1; -recheck: - for (i = 1; i < maxofallports; i++) { - for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; - ndev++) { - struct ipath_devdata *dd = ipath_lookup(ndev); - - if (!usable(dd)) - continue; /* can't use this unit */ - if (i >= dd->ipath_cfgports) - /* - * Maxed out on users of this unit. Try - * next. - */ - continue; - ret = try_alloc_port(dd, i, fp, uinfo); - if (!ret) - goto done; - } - } - - if (npresent) { - if (nup == 0) { - ret = -ENETDOWN; - ipath_dbg("No ports available (none initialized " - "and ready)\n"); - } else { - if (prefunit > 0) { - /* if started above 0, retry from 0 */ - ipath_cdbg(PROC, - "%s[%u] no ports on prefunit " - "%d, clear and re-check\n", - current->comm, current->pid, - prefunit); - devmax = ipath_count_units(NULL, NULL, - NULL); - prefunit = -1; - goto recheck; - } - ret = -EBUSY; - ipath_dbg("No ports available\n"); - } - } else { - ret = -ENXIO; - ipath_dbg("No boards found\n"); - } - -done: - return ret; -} - -static int find_shared_port(struct file *fp, - const struct ipath_user_info *uinfo) -{ - int devmax, ndev, i; - int ret = 0; - - devmax = ipath_count_units(NULL, NULL, NULL); - - for (ndev = 0; ndev < devmax; ndev++) { - struct ipath_devdata *dd = ipath_lookup(ndev); - - if (!usable(dd)) - continue; - for (i = 1; i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - /* Skip ports which are not yet open */ - if (!pd || !pd->port_cnt) - continue; - /* Skip port if it doesn't match the requested one */ - if (pd->port_subport_id != uinfo->spu_subport_id) - continue; - /* Verify the sharing process matches the master */ - if (pd->port_subport_cnt != uinfo->spu_subport_cnt || - pd->userversion != uinfo->spu_userversion || - pd->port_cnt >= pd->port_subport_cnt) { - ret = -EINVAL; - goto done; - } - port_fp(fp) = pd; - subport_fp(fp) = pd->port_cnt++; - pd->port_subpid[subport_fp(fp)] = - get_pid(task_pid(current)); - tidcursor_fp(fp) = 0; - pd->active_slaves |= 1 << subport_fp(fp); - ipath_cdbg(PROC, - "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", - current->comm, current->pid, - subport_fp(fp), - pd->port_comm, pid_nr(pd->port_pid), - dd->ipath_unit, pd->port_port); - ret = 1; - goto done; - } - } - -done: - return ret; -} - -static int ipath_open(struct inode *in, struct file *fp) -{ - /* The real work is performed later in ipath_assign_port() */ - fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); - return fp->private_data ? 0 : -ENOMEM; -} - -/* Get port early, so can set affinity prior to memory allocation */ -static int ipath_assign_port(struct file *fp, - const struct ipath_user_info *uinfo) -{ - int ret; - int i_minor; - unsigned swmajor, swminor; - - /* Check to be sure we haven't already initialized this file */ - if (port_fp(fp)) { - ret = -EINVAL; - goto done; - } - - /* for now, if major version is different, bail */ - swmajor = uinfo->spu_userversion >> 16; - if (swmajor != IPATH_USER_SWMAJOR) { - ipath_dbg("User major version %d not same as driver " - "major %d\n", uinfo->spu_userversion >> 16, - IPATH_USER_SWMAJOR); - ret = -ENODEV; - goto done; - } - - swminor = uinfo->spu_userversion & 0xffff; - if (swminor != IPATH_USER_SWMINOR) - ipath_dbg("User minor version %d not same as driver " - "minor %d\n", swminor, IPATH_USER_SWMINOR); - - mutex_lock(&ipath_mutex); - - if (ipath_compatible_subports(swmajor, swminor) && - uinfo->spu_subport_cnt && - (ret = find_shared_port(fp, uinfo))) { - if (ret > 0) - ret = 0; - goto done_chk_sdma; - } - - i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE; - ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", - (long)file_inode(fp)->i_rdev, i_minor); - - if (i_minor) - ret = find_free_port(i_minor - 1, fp, uinfo); - else - ret = find_best_unit(fp, uinfo); - -done_chk_sdma: - if (!ret) { - struct ipath_filedata *fd = fp->private_data; - const struct ipath_portdata *pd = fd->pd; - const struct ipath_devdata *dd = pd->port_dd; - - fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev, - dd->ipath_unit, - pd->port_port, - fd->subport); - - if (!fd->pq) - ret = -ENOMEM; - } - - mutex_unlock(&ipath_mutex); - -done: - return ret; -} - - -static int ipath_do_user_init(struct file *fp, - const struct ipath_user_info *uinfo) -{ - int ret; - struct ipath_portdata *pd = port_fp(fp); - struct ipath_devdata *dd; - u32 head32; - - /* Subports don't need to initialize anything since master did it. */ - if (subport_fp(fp)) { - ret = wait_event_interruptible(pd->port_wait, - !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag)); - goto done; - } - - dd = pd->port_dd; - - if (uinfo->spu_rcvhdrsize) { - ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); - if (ret) - goto done; - } - - /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ - - /* some ports may get extra buffers, calculate that here */ - if (pd->port_port <= dd->ipath_ports_extrabuf) - pd->port_piocnt = dd->ipath_pbufsport + 1; - else - pd->port_piocnt = dd->ipath_pbufsport; - - /* for right now, kernel piobufs are at end, so port 1 is at 0 */ - if (pd->port_port <= dd->ipath_ports_extrabuf) - pd->port_pio_base = (dd->ipath_pbufsport + 1) - * (pd->port_port - 1); - else - pd->port_pio_base = dd->ipath_ports_extrabuf + - dd->ipath_pbufsport * (pd->port_port - 1); - pd->port_piobufs = dd->ipath_piobufbase + - pd->port_pio_base * dd->ipath_palign; - ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u," - " first pio %u\n", pd->port_port, pd->port_piobufs, - pd->port_piocnt, pd->port_pio_base); - ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0); - - /* - * Now allocate the rcvhdr Q and eager TIDs; skip the TID - * array for time being. If pd->port_port > chip-supported, - * we need to do extra stuff here to handle by handling overflow - * through port 0, someday - */ - ret = ipath_create_rcvhdrq(dd, pd); - if (!ret) - ret = ipath_create_user_egr(pd); - if (ret) - goto done; - - /* - * set the eager head register for this port to the current values - * of the tail pointers, since we don't know if they were - * updated on last use of the port. - */ - head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); - ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); - pd->port_lastrcvhdrqtail = -1; - ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", - pd->port_port, head32); - pd->port_tidcursor = 0; /* start at beginning after open */ - - /* initialize poll variables... */ - pd->port_urgent = 0; - pd->port_urgent_poll = 0; - pd->port_hdrqfull_poll = pd->port_hdrqfull; - - /* - * Now enable the port for receive. - * For chips that are set to DMA the tail register to memory - * when they change (and when the update bit transitions from - * 0 to 1. So for those chips, we turn it off and then back on. - * This will (very briefly) affect any other open ports, but the - * duration is very short, and therefore isn't an issue. We - * explicitly set the in-memory tail copy to 0 beforehand, so we - * don't have to wait to be sure the DMA update has happened - * (chip resets head/tail to 0 on transition to enable). - */ - set_bit(dd->ipath_r_portenable_shift + pd->port_port, - &dd->ipath_rcvctrl); - if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { - if (pd->port_rcvhdrtail_kvaddr) - ipath_clear_rcvhdrtail(pd); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl & - ~(1ULL << dd->ipath_r_tailupd_shift)); - } - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - /* Notify any waiting slaves */ - if (pd->port_subport_cnt) { - clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); - wake_up(&pd->port_wait); - } -done: - return ret; -} - -/** - * unlock_exptid - unlock any expected TID entries port still had in use - * @pd: port - * - * We don't actually update the chip here, because we do a bulk update - * below, using ipath_f_clear_tids. - */ -static void unlock_expected_tids(struct ipath_portdata *pd) -{ - struct ipath_devdata *dd = pd->port_dd; - int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt; - int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt; - - ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n", - pd->port_port); - for (i = port_tidbase; i < maxtid; i++) { - struct page *ps = dd->ipath_pageshadow[i]; - - if (!ps) - continue; - - dd->ipath_pageshadow[i] = NULL; - pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], - PAGE_SIZE, PCI_DMA_FROMDEVICE); - ipath_release_user_pages_on_close(&ps, 1); - cnt++; - ipath_stats.sps_pageunlocks++; - } - if (cnt) - ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n", - pd->port_port, cnt); - - if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks) - ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n", - (unsigned long long) ipath_stats.sps_pagelocks, - (unsigned long long) - ipath_stats.sps_pageunlocks); -} - -static int ipath_close(struct inode *in, struct file *fp) -{ - int ret = 0; - struct ipath_filedata *fd; - struct ipath_portdata *pd; - struct ipath_devdata *dd; - unsigned long flags; - unsigned port; - struct pid *pid; - - ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", - (long)in->i_rdev, fp->private_data); - - mutex_lock(&ipath_mutex); - - fd = fp->private_data; - fp->private_data = NULL; - pd = fd->pd; - if (!pd) { - mutex_unlock(&ipath_mutex); - goto bail; - } - - dd = pd->port_dd; - - /* drain user sdma queue */ - ipath_user_sdma_queue_drain(dd, fd->pq); - ipath_user_sdma_queue_destroy(fd->pq); - - if (--pd->port_cnt) { - /* - * XXX If the master closes the port before the slave(s), - * revoke the mmap for the eager receive queue so - * the slave(s) don't wait for receive data forever. - */ - pd->active_slaves &= ~(1 << fd->subport); - put_pid(pd->port_subpid[fd->subport]); - pd->port_subpid[fd->subport] = NULL; - mutex_unlock(&ipath_mutex); - goto bail; - } - /* early; no interrupt users after this */ - spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); - port = pd->port_port; - dd->ipath_pd[port] = NULL; - pid = pd->port_pid; - pd->port_pid = NULL; - spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); - - if (pd->port_rcvwait_to || pd->port_piowait_to - || pd->port_rcvnowait || pd->port_pionowait) { - ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; " - "%u rcv %u, pio already\n", - pd->port_port, pd->port_rcvwait_to, - pd->port_piowait_to, pd->port_rcvnowait, - pd->port_pionowait); - pd->port_rcvwait_to = pd->port_piowait_to = - pd->port_rcvnowait = pd->port_pionowait = 0; - } - if (pd->port_flag) { - ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n", - pd->port_port, pd->port_flag); - pd->port_flag = 0; - } - - if (dd->ipath_kregbase) { - /* atomically clear receive enable port and intr avail. */ - clear_bit(dd->ipath_r_portenable_shift + port, - &dd->ipath_rcvctrl); - clear_bit(pd->port_port + dd->ipath_r_intravail_shift, - &dd->ipath_rcvctrl); - ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - /* and read back from chip to be sure that nothing - * else is in flight when we do the rest */ - (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - - /* clean up the pkeys for this port user */ - ipath_clean_part_key(pd, dd); - /* - * be paranoid, and never write 0's to these, just use an - * unused part of the port 0 tail page. Of course, - * rcvhdraddr points to a large chunk of memory, so this - * could still trash things, but at least it won't trash - * page 0, and by disabling the port, it should stop "soon", - * even if a packet or two is in already in flight after we - * disabled the port. - */ - ipath_write_kreg_port(dd, - dd->ipath_kregs->kr_rcvhdrtailaddr, port, - dd->ipath_dummy_hdrq_phys); - ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, - pd->port_port, dd->ipath_dummy_hdrq_phys); - - ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt); - ipath_chg_pioavailkernel(dd, pd->port_pio_base, - pd->port_piocnt, 1); - - dd->ipath_f_clear_tids(dd, pd->port_port); - - if (dd->ipath_pageshadow) - unlock_expected_tids(pd); - ipath_stats.sps_ports--; - ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", - pd->port_comm, pid_nr(pid), - dd->ipath_unit, port); - } - - put_pid(pid); - mutex_unlock(&ipath_mutex); - ipath_free_pddata(dd, pd); /* after releasing the mutex */ - -bail: - kfree(fd); - return ret; -} - -static int ipath_port_info(struct ipath_portdata *pd, u16 subport, - struct ipath_port_info __user *uinfo) -{ - struct ipath_port_info info; - int nup; - int ret; - size_t sz; - - (void) ipath_count_units(NULL, &nup, NULL); - info.num_active = nup; - info.unit = pd->port_dd->ipath_unit; - info.port = pd->port_port; - info.subport = subport; - /* Don't return new fields if old library opened the port. */ - if (ipath_supports_subports(pd->userversion >> 16, - pd->userversion & 0xffff)) { - /* Number of user ports available for this device. */ - info.num_ports = pd->port_dd->ipath_cfgports - 1; - info.num_subports = pd->port_subport_cnt; - sz = sizeof(info); - } else - sz = sizeof(info) - 2 * sizeof(u16); - - if (copy_to_user(uinfo, &info, sz)) { - ret = -EFAULT; - goto bail; - } - ret = 0; - -bail: - return ret; -} - -static int ipath_get_slave_info(struct ipath_portdata *pd, - void __user *slave_mask_addr) -{ - int ret = 0; - - if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) - ret = -EFAULT; - return ret; -} - -static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq, - u32 __user *inflightp) -{ - const u32 val = ipath_user_sdma_inflight_counter(pq); - - if (put_user(val, inflightp)) - return -EFAULT; - - return 0; -} - -static int ipath_sdma_get_complete(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - u32 __user *completep) -{ - u32 val; - int err; - - err = ipath_user_sdma_make_progress(dd, pq); - if (err < 0) - return err; - - val = ipath_user_sdma_complete_counter(pq); - if (put_user(val, completep)) - return -EFAULT; - - return 0; -} - -static ssize_t ipath_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - const struct ipath_cmd __user *ucmd; - struct ipath_portdata *pd; - const void __user *src; - size_t consumed, copy; - struct ipath_cmd cmd; - ssize_t ret = 0; - void *dest; - - if (count < sizeof(cmd.type)) { - ret = -EINVAL; - goto bail; - } - - ucmd = (const struct ipath_cmd __user *) data; - - if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { - ret = -EFAULT; - goto bail; - } - - consumed = sizeof(cmd.type); - - switch (cmd.type) { - case IPATH_CMD_ASSIGN_PORT: - case __IPATH_CMD_USER_INIT: - case IPATH_CMD_USER_INIT: - copy = sizeof(cmd.cmd.user_info); - dest = &cmd.cmd.user_info; - src = &ucmd->cmd.user_info; - break; - case IPATH_CMD_RECV_CTRL: - copy = sizeof(cmd.cmd.recv_ctrl); - dest = &cmd.cmd.recv_ctrl; - src = &ucmd->cmd.recv_ctrl; - break; - case IPATH_CMD_PORT_INFO: - copy = sizeof(cmd.cmd.port_info); - dest = &cmd.cmd.port_info; - src = &ucmd->cmd.port_info; - break; - case IPATH_CMD_TID_UPDATE: - case IPATH_CMD_TID_FREE: - copy = sizeof(cmd.cmd.tid_info); - dest = &cmd.cmd.tid_info; - src = &ucmd->cmd.tid_info; - break; - case IPATH_CMD_SET_PART_KEY: - copy = sizeof(cmd.cmd.part_key); - dest = &cmd.cmd.part_key; - src = &ucmd->cmd.part_key; - break; - case __IPATH_CMD_SLAVE_INFO: - copy = sizeof(cmd.cmd.slave_mask_addr); - dest = &cmd.cmd.slave_mask_addr; - src = &ucmd->cmd.slave_mask_addr; - break; - case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg - copy = 0; - src = NULL; - dest = NULL; - break; - case IPATH_CMD_POLL_TYPE: - copy = sizeof(cmd.cmd.poll_type); - dest = &cmd.cmd.poll_type; - src = &ucmd->cmd.poll_type; - break; - case IPATH_CMD_ARMLAUNCH_CTRL: - copy = sizeof(cmd.cmd.armlaunch_ctrl); - dest = &cmd.cmd.armlaunch_ctrl; - src = &ucmd->cmd.armlaunch_ctrl; - break; - case IPATH_CMD_SDMA_INFLIGHT: - copy = sizeof(cmd.cmd.sdma_inflight); - dest = &cmd.cmd.sdma_inflight; - src = &ucmd->cmd.sdma_inflight; - break; - case IPATH_CMD_SDMA_COMPLETE: - copy = sizeof(cmd.cmd.sdma_complete); - dest = &cmd.cmd.sdma_complete; - src = &ucmd->cmd.sdma_complete; - break; - default: - ret = -EINVAL; - goto bail; - } - - if (copy) { - if ((count - consumed) < copy) { - ret = -EINVAL; - goto bail; - } - - if (copy_from_user(dest, src, copy)) { - ret = -EFAULT; - goto bail; - } - - consumed += copy; - } - - pd = port_fp(fp); - if (!pd && cmd.type != __IPATH_CMD_USER_INIT && - cmd.type != IPATH_CMD_ASSIGN_PORT) { - ret = -EINVAL; - goto bail; - } - - switch (cmd.type) { - case IPATH_CMD_ASSIGN_PORT: - ret = ipath_assign_port(fp, &cmd.cmd.user_info); - if (ret) - goto bail; - break; - case __IPATH_CMD_USER_INIT: - /* backwards compatibility, get port first */ - ret = ipath_assign_port(fp, &cmd.cmd.user_info); - if (ret) - goto bail; - /* and fall through to current version. */ - case IPATH_CMD_USER_INIT: - ret = ipath_do_user_init(fp, &cmd.cmd.user_info); - if (ret) - goto bail; - ret = ipath_get_base_info( - fp, (void __user *) (unsigned long) - cmd.cmd.user_info.spu_base_info, - cmd.cmd.user_info.spu_base_info_size); - break; - case IPATH_CMD_RECV_CTRL: - ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); - break; - case IPATH_CMD_PORT_INFO: - ret = ipath_port_info(pd, subport_fp(fp), - (struct ipath_port_info __user *) - (unsigned long) cmd.cmd.port_info); - break; - case IPATH_CMD_TID_UPDATE: - ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); - break; - case IPATH_CMD_TID_FREE: - ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); - break; - case IPATH_CMD_SET_PART_KEY: - ret = ipath_set_part_key(pd, cmd.cmd.part_key); - break; - case __IPATH_CMD_SLAVE_INFO: - ret = ipath_get_slave_info(pd, - (void __user *) (unsigned long) - cmd.cmd.slave_mask_addr); - break; - case IPATH_CMD_PIOAVAILUPD: - ipath_force_pio_avail_update(pd->port_dd); - break; - case IPATH_CMD_POLL_TYPE: - pd->poll_type = cmd.cmd.poll_type; - break; - case IPATH_CMD_ARMLAUNCH_CTRL: - if (cmd.cmd.armlaunch_ctrl) - ipath_enable_armlaunch(pd->port_dd); - else - ipath_disable_armlaunch(pd->port_dd); - break; - case IPATH_CMD_SDMA_INFLIGHT: - ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp), - (u32 __user *) (unsigned long) - cmd.cmd.sdma_inflight); - break; - case IPATH_CMD_SDMA_COMPLETE: - ret = ipath_sdma_get_complete(pd->port_dd, - user_sdma_queue_fp(fp), - (u32 __user *) (unsigned long) - cmd.cmd.sdma_complete); - break; - } - - if (ret >= 0) - ret = consumed; - -bail: - return ret; -} - -static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from) -{ - struct file *filp = iocb->ki_filp; - struct ipath_filedata *fp = filp->private_data; - struct ipath_portdata *pd = port_fp(filp); - struct ipath_user_sdma_queue *pq = fp->pq; - - if (!iter_is_iovec(from) || !from->nr_segs) - return -EINVAL; - - return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs); -} - -static struct class *ipath_class; - -static int init_cdev(int minor, char *name, const struct file_operations *fops, - struct cdev **cdevp, struct device **devp) -{ - const dev_t dev = MKDEV(IPATH_MAJOR, minor); - struct cdev *cdev = NULL; - struct device *device = NULL; - int ret; - - cdev = cdev_alloc(); - if (!cdev) { - printk(KERN_ERR IPATH_DRV_NAME - ": Could not allocate cdev for minor %d, %s\n", - minor, name); - ret = -ENOMEM; - goto done; - } - - cdev->owner = THIS_MODULE; - cdev->ops = fops; - kobject_set_name(&cdev->kobj, name); - - ret = cdev_add(cdev, dev, 1); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME - ": Could not add cdev for minor %d, %s (err %d)\n", - minor, name, -ret); - goto err_cdev; - } - - device = device_create(ipath_class, NULL, dev, NULL, name); - - if (IS_ERR(device)) { - ret = PTR_ERR(device); - printk(KERN_ERR IPATH_DRV_NAME ": Could not create " - "device for minor %d, %s (err %d)\n", - minor, name, -ret); - goto err_cdev; - } - - goto done; - -err_cdev: - cdev_del(cdev); - cdev = NULL; - -done: - if (ret >= 0) { - *cdevp = cdev; - *devp = device; - } else { - *cdevp = NULL; - *devp = NULL; - } - - return ret; -} - -int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, - struct cdev **cdevp, struct device **devp) -{ - return init_cdev(minor, name, fops, cdevp, devp); -} - -static void cleanup_cdev(struct cdev **cdevp, - struct device **devp) -{ - struct device *dev = *devp; - - if (dev) { - device_unregister(dev); - *devp = NULL; - } - - if (*cdevp) { - cdev_del(*cdevp); - *cdevp = NULL; - } -} - -void ipath_cdev_cleanup(struct cdev **cdevp, - struct device **devp) -{ - cleanup_cdev(cdevp, devp); -} - -static struct cdev *wildcard_cdev; -static struct device *wildcard_dev; - -static const dev_t dev = MKDEV(IPATH_MAJOR, 0); - -static int user_init(void) -{ - int ret; - - ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME); - if (ret < 0) { - printk(KERN_ERR IPATH_DRV_NAME ": Could not register " - "chrdev region (err %d)\n", -ret); - goto done; - } - - ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME); - - if (IS_ERR(ipath_class)) { - ret = PTR_ERR(ipath_class); - printk(KERN_ERR IPATH_DRV_NAME ": Could not create " - "device class (err %d)\n", -ret); - goto bail; - } - - goto done; -bail: - unregister_chrdev_region(dev, IPATH_NMINORS); -done: - return ret; -} - -static void user_cleanup(void) -{ - if (ipath_class) { - class_destroy(ipath_class); - ipath_class = NULL; - } - - unregister_chrdev_region(dev, IPATH_NMINORS); -} - -static atomic_t user_count = ATOMIC_INIT(0); -static atomic_t user_setup = ATOMIC_INIT(0); - -int ipath_user_add(struct ipath_devdata *dd) -{ - char name[10]; - int ret; - - if (atomic_inc_return(&user_count) == 1) { - ret = user_init(); - if (ret < 0) { - ipath_dev_err(dd, "Unable to set up user support: " - "error %d\n", -ret); - goto bail; - } - ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev, - &wildcard_dev); - if (ret < 0) { - ipath_dev_err(dd, "Could not create wildcard " - "minor: error %d\n", -ret); - goto bail_user; - } - - atomic_set(&user_setup, 1); - } - - snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit); - - ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops, - &dd->user_cdev, &dd->user_dev); - if (ret < 0) - ipath_dev_err(dd, "Could not create user minor %d, %s\n", - dd->ipath_unit + 1, name); - - goto bail; - -bail_user: - user_cleanup(); -bail: - return ret; -} - -void ipath_user_remove(struct ipath_devdata *dd) -{ - cleanup_cdev(&dd->user_cdev, &dd->user_dev); - - if (atomic_dec_return(&user_count) == 0) { - if (atomic_read(&user_setup) == 0) - goto bail; - - cleanup_cdev(&wildcard_cdev, &wildcard_dev); - user_cleanup(); - - atomic_set(&user_setup, 0); - } -bail: - return; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_fs.c b/kernel/drivers/infiniband/hw/ipath/ipath_fs.c deleted file mode 100644 index 1ca8e32a9..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_fs.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/mount.h> -#include <linux/pagemap.h> -#include <linux/init.h> -#include <linux/namei.h> -#include <linux/slab.h> - -#include "ipath_kernel.h" - -#define IPATHFS_MAGIC 0x726a77 - -static struct super_block *ipath_super; - -static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, - umode_t mode, const struct file_operations *fops, - void *data) -{ - int error; - struct inode *inode = new_inode(dir->i_sb); - - if (!inode) { - error = -EPERM; - goto bail; - } - - inode->i_ino = get_next_ino(); - inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_private = data; - if (S_ISDIR(mode)) { - inode->i_op = &simple_dir_inode_operations; - inc_nlink(inode); - inc_nlink(dir); - } - - inode->i_fop = fops; - - d_instantiate(dentry, inode); - error = 0; - -bail: - return error; -} - -static int create_file(const char *name, umode_t mode, - struct dentry *parent, struct dentry **dentry, - const struct file_operations *fops, void *data) -{ - int error; - - mutex_lock(&d_inode(parent)->i_mutex); - *dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(*dentry)) - error = ipathfs_mknod(d_inode(parent), *dentry, - mode, fops, data); - else - error = PTR_ERR(*dentry); - mutex_unlock(&d_inode(parent)->i_mutex); - - return error; -} - -static ssize_t atomic_stats_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - return simple_read_from_buffer(buf, count, ppos, &ipath_stats, - sizeof ipath_stats); -} - -static const struct file_operations atomic_stats_ops = { - .read = atomic_stats_read, - .llseek = default_llseek, -}; - -static ssize_t atomic_counters_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct infinipath_counters counters; - struct ipath_devdata *dd; - - dd = file_inode(file)->i_private; - dd->ipath_f_read_counters(dd, &counters); - - return simple_read_from_buffer(buf, count, ppos, &counters, - sizeof counters); -} - -static const struct file_operations atomic_counters_ops = { - .read = atomic_counters_read, - .llseek = default_llseek, -}; - -static ssize_t flash_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct ipath_devdata *dd; - ssize_t ret; - loff_t pos; - char *tmp; - - pos = *ppos; - - if ( pos < 0) { - ret = -EINVAL; - goto bail; - } - - if (pos >= sizeof(struct ipath_flash)) { - ret = 0; - goto bail; - } - - if (count > sizeof(struct ipath_flash) - pos) - count = sizeof(struct ipath_flash) - pos; - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } - - dd = file_inode(file)->i_private; - if (ipath_eeprom_read(dd, pos, tmp, count)) { - ipath_dev_err(dd, "failed to read from flash\n"); - ret = -ENXIO; - goto bail_tmp; - } - - if (copy_to_user(buf, tmp, count)) { - ret = -EFAULT; - goto bail_tmp; - } - - *ppos = pos + count; - ret = count; - -bail_tmp: - kfree(tmp); - -bail: - return ret; -} - -static ssize_t flash_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ipath_devdata *dd; - ssize_t ret; - loff_t pos; - char *tmp; - - pos = *ppos; - - if (pos != 0) { - ret = -EINVAL; - goto bail; - } - - if (count != sizeof(struct ipath_flash)) { - ret = -EINVAL; - goto bail; - } - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } - - if (copy_from_user(tmp, buf, count)) { - ret = -EFAULT; - goto bail_tmp; - } - - dd = file_inode(file)->i_private; - if (ipath_eeprom_write(dd, pos, tmp, count)) { - ret = -ENXIO; - ipath_dev_err(dd, "failed to write to flash\n"); - goto bail_tmp; - } - - *ppos = pos + count; - ret = count; - -bail_tmp: - kfree(tmp); - -bail: - return ret; -} - -static const struct file_operations flash_ops = { - .read = flash_read, - .write = flash_write, - .llseek = default_llseek, -}; - -static int create_device_files(struct super_block *sb, - struct ipath_devdata *dd) -{ - struct dentry *dir, *tmp; - char unit[10]; - int ret; - - snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); - ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, - &simple_dir_operations, dd); - if (ret) { - printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); - goto bail; - } - - ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp, - &atomic_counters_ops, dd); - if (ret) { - printk(KERN_ERR "create_file(%s/atomic_counters) " - "failed: %d\n", unit, ret); - goto bail; - } - - ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, - &flash_ops, dd); - if (ret) { - printk(KERN_ERR "create_file(%s/flash) " - "failed: %d\n", unit, ret); - goto bail; - } - -bail: - return ret; -} - -static int remove_file(struct dentry *parent, char *name) -{ - struct dentry *tmp; - int ret; - - tmp = lookup_one_len(name, parent, strlen(name)); - - if (IS_ERR(tmp)) { - ret = PTR_ERR(tmp); - goto bail; - } - - spin_lock(&tmp->d_lock); - if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { - dget_dlock(tmp); - __d_drop(tmp); - spin_unlock(&tmp->d_lock); - simple_unlink(d_inode(parent), tmp); - } else - spin_unlock(&tmp->d_lock); - - ret = 0; -bail: - /* - * We don't expect clients to care about the return value, but - * it's there if they need it. - */ - return ret; -} - -static int remove_device_files(struct super_block *sb, - struct ipath_devdata *dd) -{ - struct dentry *dir, *root; - char unit[10]; - int ret; - - root = dget(sb->s_root); - mutex_lock(&d_inode(root)->i_mutex); - snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); - dir = lookup_one_len(unit, root, strlen(unit)); - - if (IS_ERR(dir)) { - ret = PTR_ERR(dir); - printk(KERN_ERR "Lookup of %s failed\n", unit); - goto bail; - } - - remove_file(dir, "flash"); - remove_file(dir, "atomic_counters"); - d_delete(dir); - ret = simple_rmdir(d_inode(root), dir); - -bail: - mutex_unlock(&d_inode(root)->i_mutex); - dput(root); - return ret; -} - -static int ipathfs_fill_super(struct super_block *sb, void *data, - int silent) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - int ret; - - static struct tree_descr files[] = { - [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO}, - {""}, - }; - - ret = simple_fill_super(sb, IPATHFS_MAGIC, files); - if (ret) { - printk(KERN_ERR "simple_fill_super failed: %d\n", ret); - goto bail; - } - - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - ret = create_device_files(sb, dd); - if (ret) - goto bail; - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - -bail: - return ret; -} - -static struct dentry *ipathfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - struct dentry *ret; - ret = mount_single(fs_type, flags, data, ipathfs_fill_super); - if (!IS_ERR(ret)) - ipath_super = ret->d_sb; - return ret; -} - -static void ipathfs_kill_super(struct super_block *s) -{ - kill_litter_super(s); - ipath_super = NULL; -} - -int ipathfs_add_device(struct ipath_devdata *dd) -{ - int ret; - - if (ipath_super == NULL) { - ret = 0; - goto bail; - } - - ret = create_device_files(ipath_super, dd); - -bail: - return ret; -} - -int ipathfs_remove_device(struct ipath_devdata *dd) -{ - int ret; - - if (ipath_super == NULL) { - ret = 0; - goto bail; - } - - ret = remove_device_files(ipath_super, dd); - -bail: - return ret; -} - -static struct file_system_type ipathfs_fs_type = { - .owner = THIS_MODULE, - .name = "ipathfs", - .mount = ipathfs_mount, - .kill_sb = ipathfs_kill_super, -}; -MODULE_ALIAS_FS("ipathfs"); - -int __init ipath_init_ipathfs(void) -{ - return register_filesystem(&ipathfs_fs_type); -} - -void __exit ipath_exit_ipathfs(void) -{ - unregister_filesystem(&ipathfs_fs_type); -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_iba6110.c b/kernel/drivers/infiniband/hw/ipath/ipath_iba6110.c deleted file mode 100644 index 7cc305488..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ /dev/null @@ -1,1940 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file contains all of the code that is specific to the InfiniPath - * HT chip. - */ - -#include <linux/vmalloc.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/htirq.h> -#include <rdma/ib_verbs.h> - -#include "ipath_kernel.h" -#include "ipath_registers.h" - -static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64); - - -/* - * This lists the InfiniPath registers, in the actual chip layout. - * This structure should never be directly accessed. - * - * The names are in InterCap form because they're taken straight from - * the chip specification. Since they're only used in this file, they - * don't pollute the rest of the source. -*/ - -struct _infinipath_do_not_use_kernel_regs { - unsigned long long Revision; - unsigned long long Control; - unsigned long long PageAlign; - unsigned long long PortCnt; - unsigned long long DebugPortSelect; - unsigned long long DebugPort; - unsigned long long SendRegBase; - unsigned long long UserRegBase; - unsigned long long CounterRegBase; - unsigned long long Scratch; - unsigned long long ReservedMisc1; - unsigned long long InterruptConfig; - unsigned long long IntBlocked; - unsigned long long IntMask; - unsigned long long IntStatus; - unsigned long long IntClear; - unsigned long long ErrorMask; - unsigned long long ErrorStatus; - unsigned long long ErrorClear; - unsigned long long HwErrMask; - unsigned long long HwErrStatus; - unsigned long long HwErrClear; - unsigned long long HwDiagCtrl; - unsigned long long MDIO; - unsigned long long IBCStatus; - unsigned long long IBCCtrl; - unsigned long long ExtStatus; - unsigned long long ExtCtrl; - unsigned long long GPIOOut; - unsigned long long GPIOMask; - unsigned long long GPIOStatus; - unsigned long long GPIOClear; - unsigned long long RcvCtrl; - unsigned long long RcvBTHQP; - unsigned long long RcvHdrSize; - unsigned long long RcvHdrCnt; - unsigned long long RcvHdrEntSize; - unsigned long long RcvTIDBase; - unsigned long long RcvTIDCnt; - unsigned long long RcvEgrBase; - unsigned long long RcvEgrCnt; - unsigned long long RcvBufBase; - unsigned long long RcvBufSize; - unsigned long long RxIntMemBase; - unsigned long long RxIntMemSize; - unsigned long long RcvPartitionKey; - unsigned long long ReservedRcv[10]; - unsigned long long SendCtrl; - unsigned long long SendPIOBufBase; - unsigned long long SendPIOSize; - unsigned long long SendPIOBufCnt; - unsigned long long SendPIOAvailAddr; - unsigned long long TxIntMemBase; - unsigned long long TxIntMemSize; - unsigned long long ReservedSend[9]; - unsigned long long SendBufferError; - unsigned long long SendBufferErrorCONT1; - unsigned long long SendBufferErrorCONT2; - unsigned long long SendBufferErrorCONT3; - unsigned long long ReservedSBE[4]; - unsigned long long RcvHdrAddr0; - unsigned long long RcvHdrAddr1; - unsigned long long RcvHdrAddr2; - unsigned long long RcvHdrAddr3; - unsigned long long RcvHdrAddr4; - unsigned long long RcvHdrAddr5; - unsigned long long RcvHdrAddr6; - unsigned long long RcvHdrAddr7; - unsigned long long RcvHdrAddr8; - unsigned long long ReservedRHA[7]; - unsigned long long RcvHdrTailAddr0; - unsigned long long RcvHdrTailAddr1; - unsigned long long RcvHdrTailAddr2; - unsigned long long RcvHdrTailAddr3; - unsigned long long RcvHdrTailAddr4; - unsigned long long RcvHdrTailAddr5; - unsigned long long RcvHdrTailAddr6; - unsigned long long RcvHdrTailAddr7; - unsigned long long RcvHdrTailAddr8; - unsigned long long ReservedRHTA[7]; - unsigned long long Sync; /* Software only */ - unsigned long long Dump; /* Software only */ - unsigned long long SimVer; /* Software only */ - unsigned long long ReservedSW[5]; - unsigned long long SerdesConfig0; - unsigned long long SerdesConfig1; - unsigned long long SerdesStatus; - unsigned long long XGXSConfig; - unsigned long long ReservedSW2[4]; -}; - -struct _infinipath_do_not_use_counters { - __u64 LBIntCnt; - __u64 LBFlowStallCnt; - __u64 Reserved1; - __u64 TxUnsupVLErrCnt; - __u64 TxDataPktCnt; - __u64 TxFlowPktCnt; - __u64 TxDwordCnt; - __u64 TxLenErrCnt; - __u64 TxMaxMinLenErrCnt; - __u64 TxUnderrunCnt; - __u64 TxFlowStallCnt; - __u64 TxDroppedPktCnt; - __u64 RxDroppedPktCnt; - __u64 RxDataPktCnt; - __u64 RxFlowPktCnt; - __u64 RxDwordCnt; - __u64 RxLenErrCnt; - __u64 RxMaxMinLenErrCnt; - __u64 RxICRCErrCnt; - __u64 RxVCRCErrCnt; - __u64 RxFlowCtrlErrCnt; - __u64 RxBadFormatCnt; - __u64 RxLinkProblemCnt; - __u64 RxEBPCnt; - __u64 RxLPCRCErrCnt; - __u64 RxBufOvflCnt; - __u64 RxTIDFullErrCnt; - __u64 RxTIDValidErrCnt; - __u64 RxPKeyMismatchCnt; - __u64 RxP0HdrEgrOvflCnt; - __u64 RxP1HdrEgrOvflCnt; - __u64 RxP2HdrEgrOvflCnt; - __u64 RxP3HdrEgrOvflCnt; - __u64 RxP4HdrEgrOvflCnt; - __u64 RxP5HdrEgrOvflCnt; - __u64 RxP6HdrEgrOvflCnt; - __u64 RxP7HdrEgrOvflCnt; - __u64 RxP8HdrEgrOvflCnt; - __u64 Reserved6; - __u64 Reserved7; - __u64 IBStatusChangeCnt; - __u64 IBLinkErrRecoveryCnt; - __u64 IBLinkDownedCnt; - __u64 IBSymbolErrCnt; -}; - -#define IPATH_KREG_OFFSET(field) (offsetof( \ - struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) -#define IPATH_CREG_OFFSET(field) (offsetof( \ - struct _infinipath_do_not_use_counters, field) / sizeof(u64)) - -static const struct ipath_kregs ipath_ht_kregs = { - .kr_control = IPATH_KREG_OFFSET(Control), - .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase), - .kr_debugport = IPATH_KREG_OFFSET(DebugPort), - .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect), - .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear), - .kr_errormask = IPATH_KREG_OFFSET(ErrorMask), - .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus), - .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl), - .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus), - .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear), - .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask), - .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut), - .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus), - .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl), - .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear), - .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask), - .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus), - .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl), - .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus), - .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked), - .kr_intclear = IPATH_KREG_OFFSET(IntClear), - .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig), - .kr_intmask = IPATH_KREG_OFFSET(IntMask), - .kr_intstatus = IPATH_KREG_OFFSET(IntStatus), - .kr_mdio = IPATH_KREG_OFFSET(MDIO), - .kr_pagealign = IPATH_KREG_OFFSET(PageAlign), - .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey), - .kr_portcnt = IPATH_KREG_OFFSET(PortCnt), - .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP), - .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase), - .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize), - .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl), - .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase), - .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt), - .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt), - .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize), - .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize), - .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase), - .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize), - .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase), - .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt), - .kr_revision = IPATH_KREG_OFFSET(Revision), - .kr_scratch = IPATH_KREG_OFFSET(Scratch), - .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError), - .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl), - .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr), - .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase), - .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt), - .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize), - .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase), - .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase), - .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize), - .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase), - .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0), - .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1), - .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), - .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), - /* - * These should not be used directly via ipath_write_kreg64(), - * use them with ipath_write_kreg64_port(), - */ - .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), - .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) -}; - -static const struct ipath_cregs ipath_ht_cregs = { - .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt), - .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt), - .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt), - .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt), - .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt), - .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt), - .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt), - .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt), - .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt), - .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt), - .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt), - .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt), - /* calc from Reg_CounterRegBase + offset */ - .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt), - .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt), - .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt), - .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt), - .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt), - .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt), - .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt), - .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt), - .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt), - .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt), - .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt), - .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt), - .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt), - .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt), - .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt), - .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt), - .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt), - .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt), - .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt), - .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt), - .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt) -}; - -/* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1) -#define INFINIPATH_I_RCVURG_SHIFT 0 -#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1) -#define INFINIPATH_I_RCVAVAIL_SHIFT 12 - -/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ -#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0 -#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL -#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR 0x0000000000800000ULL -#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR 0x0000000001000000ULL -#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR 0x0000000002000000ULL -#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR 0x0000000004000000ULL -#define INFINIPATH_HWE_HTCMISCERR4 0x0000000008000000ULL -#define INFINIPATH_HWE_HTCMISCERR5 0x0000000010000000ULL -#define INFINIPATH_HWE_HTCMISCERR6 0x0000000020000000ULL -#define INFINIPATH_HWE_HTCMISCERR7 0x0000000040000000ULL -#define INFINIPATH_HWE_HTCBUSTREQPARITYERR 0x0000000080000000ULL -#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL -#define INFINIPATH_HWE_HTCBUSIREQPARITYERR 0x0000000200000000ULL -#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL -#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL -#define INFINIPATH_HWE_HTBPLL_FBSLIP 0x0200000000000000ULL -#define INFINIPATH_HWE_HTBPLL_RFSLIP 0x0400000000000000ULL -#define INFINIPATH_HWE_HTAPLL_FBSLIP 0x0800000000000000ULL -#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL -#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL - -#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf -#define IBA6110_IBCS_LINKSTATE_SHIFT 4 - -/* kr_extstatus bits */ -#define INFINIPATH_EXTS_FREQSEL 0x2 -#define INFINIPATH_EXTS_SERDESSEL 0x4 -#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 -#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 - - -/* TID entries (memory), HT-only */ -#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ -#define INFINIPATH_RT_VALID 0x8000000000000000ULL -#define INFINIPATH_RT_ADDR_SHIFT 0 -#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL -#define INFINIPATH_RT_BUFSIZE_SHIFT 48 - -#define INFINIPATH_R_INTRAVAIL_SHIFT 16 -#define INFINIPATH_R_TAILUPD_SHIFT 31 - -/* kr_xgxsconfig bits */ -#define INFINIPATH_XGXS_RESET 0x7ULL - -/* - * masks and bits that are different in different chips, or present only - * in one - */ -static const ipath_err_t infinipath_hwe_htcmemparityerr_mask = - INFINIPATH_HWE_HTCMEMPARITYERR_MASK; -static const ipath_err_t infinipath_hwe_htcmemparityerr_shift = - INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT; - -static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr = - INFINIPATH_HWE_HTCLNKABYTE0CRCERR; -static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr = - INFINIPATH_HWE_HTCLNKABYTE1CRCERR; -static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr = - INFINIPATH_HWE_HTCLNKBBYTE0CRCERR; -static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr = - INFINIPATH_HWE_HTCLNKBBYTE1CRCERR; - -#define _IPATH_GPIO_SDA_NUM 1 -#define _IPATH_GPIO_SCL_NUM 0 - -#define IPATH_GPIO_SDA \ - (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -#define IPATH_GPIO_SCL \ - (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) - -/* keep the code below somewhat more readable; not used elsewhere */ -#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ - infinipath_hwe_htclnkabyte1crcerr) -#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \ - infinipath_hwe_htclnkbbyte1crcerr) -#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ - infinipath_hwe_htclnkbbyte0crcerr) -#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr | \ - infinipath_hwe_htclnkbbyte1crcerr) - -static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs, - char *msg, size_t msgl) -{ - char bitsmsg[64]; - ipath_err_t crcbits = hwerrs & - (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS); - /* don't check if 8bit HT */ - if (dd->ipath_flags & IPATH_8BIT_IN_HT0) - crcbits &= ~infinipath_hwe_htclnkabyte1crcerr; - /* don't check if 8bit HT */ - if (dd->ipath_flags & IPATH_8BIT_IN_HT1) - crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr; - /* - * we'll want to ignore link errors on link that is - * not in use, if any. For now, complain about both - */ - if (crcbits) { - u16 ctrl0, ctrl1; - snprintf(bitsmsg, sizeof bitsmsg, - "[HT%s lane %s CRC (%llx); powercycle to completely clear]", - !(crcbits & _IPATH_HTLINK1_CRCBITS) ? - "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS) - ? "1 (B)" : "0+1 (A+B)"), - !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0" - : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" : - "0+1"), (unsigned long long) crcbits); - strlcat(msg, bitsmsg, msgl); - - /* - * print extra info for debugging. slave/primary - * config word 4, 8 (link control 0, 1) - */ - - if (pci_read_config_word(dd->pcidev, - dd->ipath_ht_slave_off + 0x4, - &ctrl0)) - dev_info(&dd->pcidev->dev, "Couldn't read " - "linkctrl0 of slave/primary " - "config block\n"); - else if (!(ctrl0 & 1 << 6)) - /* not if EOC bit set */ - ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0, - ((ctrl0 >> 8) & 7) ? " CRC" : "", - ((ctrl0 >> 4) & 1) ? "linkfail" : - ""); - if (pci_read_config_word(dd->pcidev, - dd->ipath_ht_slave_off + 0x8, - &ctrl1)) - dev_info(&dd->pcidev->dev, "Couldn't read " - "linkctrl1 of slave/primary " - "config block\n"); - else if (!(ctrl1 & 1 << 6)) - /* not if EOC bit set */ - ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1, - ((ctrl1 >> 8) & 7) ? " CRC" : "", - ((ctrl1 >> 4) & 1) ? "linkfail" : - ""); - - /* disable until driver reloaded */ - dd->ipath_hwerrmask &= ~crcbits; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - ipath_dbg("HT crc errs: %s\n", msg); - } else - ipath_dbg("ignoring HT crc errors 0x%llx, " - "not in use\n", (unsigned long long) - (hwerrs & (_IPATH_HTLINK0_CRCBITS | - _IPATH_HTLINK1_CRCBITS))); -} - -/* 6110 specific hardware errors... */ -static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { - INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"), - INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"), - INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"), - INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"), - INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"), - INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"), - INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), - INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), -}; - -#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) -#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \ - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) - -static void ipath_ht_txe_recover(struct ipath_devdata *dd) -{ - ++ipath_stats.sps_txeparity; - dev_info(&dd->pcidev->dev, - "Recovering from TXE PIO parity error\n"); -} - - -/** - * ipath_ht_handle_hwerrors - display hardware errors. - * @dd: the infinipath device - * @msg: the output buffer - * @msgl: the size of the output buffer - * - * Use same msg buffer as regular errors to avoid excessive stack - * use. Most hardware errors are catastrophic, but for right now, - * we'll print them and continue. We reuse the same message buffer as - * ipath_handle_errors() to avoid excessive stack usage. - */ -static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, - size_t msgl) -{ - ipath_err_t hwerrs; - u32 bits, ctrl; - int isfatal = 0; - char bitsmsg[64]; - int log_idx; - - hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); - - if (!hwerrs) { - ipath_cdbg(VERBOSE, "Called but no hardware errors set\n"); - /* - * better than printing cofusing messages - * This seems to be related to clearing the crc error, or - * the pll error during init. - */ - goto bail; - } else if (hwerrs == -1LL) { - ipath_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); - goto bail; - } - ipath_stats.sps_hwerrs++; - - /* Always clear the error status register, except MEMBISTFAIL, - * regardless of whether we continue or stop using the chip. - * We want that set so we know it failed, even across driver reload. - * We'll still ignore it in the hwerrmask. We do this partly for - * diagnostics, but also for support */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - hwerrs&~INFINIPATH_HWE_MEMBISTFAILED); - - hwerrs &= dd->ipath_hwerrmask; - - /* We log some errors to EEPROM, check if we have any of those. */ - for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) - if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) - ipath_inc_eeprom_err(dd, log_idx, 1); - - /* - * make sure we get this much out, unless told to be quiet, - * it's a parity error we may recover from, - * or it's occurred within the last 5 seconds - */ - if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY | - RXE_EAGER_PARITY)) || - (ipath_debug & __IPATH_VERBDBG)) - dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); - dd->ipath_lasthwerror |= hwerrs; - - if (hwerrs & ~dd->ipath_hwe_bitsextant) - ipath_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~dd->ipath_hwe_bitsextant)); - - ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); - if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { - /* - * parity errors in send memory are recoverable, - * just cancel the send (if indicated in * sendbuffererror), - * count the occurrence, unfreeze (if no other handled - * hardware error bits are set), and continue. They can - * occur if a processor speculative read is done to the PIO - * buffer while we are sending a packet, for example. - */ - if (hwerrs & TXE_PIO_PARITY) { - ipath_ht_txe_recover(dd); - hwerrs &= ~TXE_PIO_PARITY; - } - - if (!hwerrs) { - ipath_dbg("Clearing freezemode on ignored or " - "recovered hardware error\n"); - ipath_clear_freeze(dd); - } - } - - *msg = '\0'; - - /* - * may someday want to decode into which bits are which - * functional area for parity errors, etc. - */ - if (hwerrs & (infinipath_hwe_htcmemparityerr_mask - << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_HTCMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ", - bits); - strlcat(msg, bitsmsg, msgl); - } - - ipath_format_hwerrors(hwerrs, - ipath_6110_hwerror_msgs, - ARRAY_SIZE(ipath_6110_hwerror_msgs), - msg, msgl); - - if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) - hwerr_crcbits(dd, hwerrs, msg, msgl); - - if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", - msgl); - /* ignore from now on, so disable until driver reloaded */ - dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } -#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ - INFINIPATH_HWE_COREPLL_RFSLIP | \ - INFINIPATH_HWE_HTBPLL_FBSLIP | \ - INFINIPATH_HWE_HTBPLL_RFSLIP | \ - INFINIPATH_HWE_HTAPLL_FBSLIP | \ - INFINIPATH_HWE_HTAPLL_RFSLIP) - - if (hwerrs & _IPATH_PLL_FAIL) { - snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), InfiniPath hardware unusable]", - (unsigned long long) (hwerrs & _IPATH_PLL_FAIL)); - strlcat(msg, bitsmsg, msgl); - /* ignore from now on, so disable until driver reloaded */ - dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { - /* - * If it occurs, it is left masked since the eternal - * interface is unused - */ - dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - if (hwerrs) { - /* - * if any set that we aren't ignoring; only - * make the complaint once, in case it's stuck - * or recurring, and we get here multiple - * times. - * force link down, so switch knows, and - * LEDs are turned off - */ - if (dd->ipath_flags & IPATH_INITTED) { - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); - ipath_setup_ht_setextled(dd, - INFINIPATH_IBCS_L_STATE_DOWN, - INFINIPATH_IBCS_LT_STATE_DISABLED); - ipath_dev_err(dd, "Fatal Hardware Error (freeze " - "mode), no longer usable, SN %.16s\n", - dd->ipath_serial); - isfatal = 1; - } - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - /* mark as having had error */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - /* - * mark as not usable, at a minimum until driver - * is reloaded, probably until reboot, since no - * other reset is possible. - */ - dd->ipath_flags &= ~IPATH_INITTED; - } - else - *msg = 0; /* recovered from all of them */ - if (*msg) - ipath_dev_err(dd, "%s hardware error\n", msg); - if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) - /* - * for status file; if no trailing brace is copied, - * we'll know it was truncated. - */ - snprintf(dd->ipath_freezemsg, - dd->ipath_freezelen, "{%s}", msg); - -bail:; -} - -/** - * ipath_ht_boardname - fill in the board name - * @dd: the infinipath device - * @name: the output buffer - * @namelen: the size of the output buffer - * - * fill in the board name, based on the board revision register - */ -static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, - size_t namelen) -{ - char *n = NULL; - u8 boardrev = dd->ipath_boardrev; - int ret = 0; - - switch (boardrev) { - case 5: - /* - * original production board; two production levels, with - * different serial number ranges. See ipath_ht_early_init() for - * case where we enable IPATH_GPIO_INTR for later serial # range. - * Original 112* serial number is no longer supported. - */ - n = "InfiniPath_QHT7040"; - break; - case 7: - /* small form factor production board */ - n = "InfiniPath_QHT7140"; - break; - default: /* don't know, just print the number */ - ipath_dev_err(dd, "Don't yet know about board " - "with ID %u\n", boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", - boardrev); - break; - } - if (n) - snprintf(name, namelen, "%s", n); - - if (ret) { - ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name); - goto bail; - } - if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || - dd->ipath_minrev > 4)) { - /* - * This version of the driver only supports Rev 3.2 - 3.4 - */ - ipath_dev_err(dd, - "Unsupported InfiniPath hardware revision %u.%u!\n", - dd->ipath_majrev, dd->ipath_minrev); - ret = 1; - goto bail; - } - /* - * pkt/word counters are 32 bit, and therefore wrap fast enough - * that we snapshot them from a timer, and maintain 64 bit shadow - * copies - */ - dd->ipath_flags |= IPATH_32BITCOUNTERS; - dd->ipath_flags |= IPATH_GPIO_INTR; - if (dd->ipath_lbus_speed != 800) - ipath_dev_err(dd, - "Incorrectly configured for HT @ %uMHz\n", - dd->ipath_lbus_speed); - - /* - * set here, not in ipath_init_*_funcs because we have to do - * it after we can read chip registers. - */ - dd->ipath_ureg_align = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); - -bail: - return ret; -} - -static void ipath_check_htlink(struct ipath_devdata *dd) -{ - u8 linkerr, link_off, i; - - for (i = 0; i < 2; i++) { - link_off = dd->ipath_ht_slave_off + i * 4 + 0xd; - if (pci_read_config_byte(dd->pcidev, link_off, &linkerr)) - dev_info(&dd->pcidev->dev, "Couldn't read " - "linkerror%d of HT slave/primary block\n", - i); - else if (linkerr & 0xf0) { - ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, " - "clearing\n", linkerr >> 4, i); - /* - * writing the linkerr bits that are set should - * clear them - */ - if (pci_write_config_byte(dd->pcidev, link_off, - linkerr)) - ipath_dbg("Failed write to clear HT " - "linkerror%d\n", i); - if (pci_read_config_byte(dd->pcidev, link_off, - &linkerr)) - dev_info(&dd->pcidev->dev, - "Couldn't reread linkerror%d of " - "HT slave/primary block\n", i); - else if (linkerr & 0xf0) - dev_info(&dd->pcidev->dev, - "HT linkerror%d bits 0x%x " - "couldn't be cleared\n", - i, linkerr >> 4); - } - } -} - -static int ipath_setup_ht_reset(struct ipath_devdata *dd) -{ - ipath_dbg("No reset possible for this InfiniPath hardware\n"); - return 0; -} - -#define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */ -#define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */ - -/* - * Bits 13-15 of command==0 is slave/primary block. Clear any HT CRC - * errors. We only bother to do this at load time, because it's OK if - * it happened before we were loaded (first time after boot/reset), - * but any time after that, it's fatal anyway. Also need to not check - * for upper byte errors if we are in 8 bit mode, so figure out - * our width. For now, at least, also complain if it's 8 bit. - */ -static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev, - int pos, u8 cap_type) -{ - u8 linkwidth = 0, linkerr, link_a_b_off, link_off; - u16 linkctrl = 0; - int i; - - dd->ipath_ht_slave_off = pos; - /* command word, master_host bit */ - /* master host || slave */ - if ((cap_type >> 2) & 1) - link_a_b_off = 4; - else - link_a_b_off = 0; - ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n", - link_a_b_off ? 1 : 0, - link_a_b_off ? 'B' : 'A'); - - link_a_b_off += pos; - - /* - * check both link control registers; clear both HT CRC sets if - * necessary. - */ - for (i = 0; i < 2; i++) { - link_off = pos + i * 4 + 0x4; - if (pci_read_config_word(pdev, link_off, &linkctrl)) - ipath_dev_err(dd, "Couldn't read HT link control%d " - "register\n", i); - else if (linkctrl & (0xf << 8)) { - ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error " - "bits %x\n", i, linkctrl & (0xf << 8)); - /* - * now write them back to clear the error. - */ - pci_write_config_word(pdev, link_off, - linkctrl & (0xf << 8)); - } - } - - /* - * As with HT CRC bits, same for protocol errors that might occur - * during boot. - */ - for (i = 0; i < 2; i++) { - link_off = pos + i * 4 + 0xd; - if (pci_read_config_byte(pdev, link_off, &linkerr)) - dev_info(&pdev->dev, "Couldn't read linkerror%d " - "of HT slave/primary block\n", i); - else if (linkerr & 0xf0) { - ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, " - "clearing\n", linkerr >> 4, i); - /* - * writing the linkerr bits that are set will clear - * them - */ - if (pci_write_config_byte - (pdev, link_off, linkerr)) - ipath_dbg("Failed write to clear HT " - "linkerror%d\n", i); - if (pci_read_config_byte(pdev, link_off, &linkerr)) - dev_info(&pdev->dev, "Couldn't reread " - "linkerror%d of HT slave/primary " - "block\n", i); - else if (linkerr & 0xf0) - dev_info(&pdev->dev, "HT linkerror%d bits " - "0x%x couldn't be cleared\n", - i, linkerr >> 4); - } - } - - /* - * this is just for our link to the host, not devices connected - * through tunnel. - */ - - if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth)) - ipath_dev_err(dd, "Couldn't read HT link width " - "config register\n"); - else { - u32 width; - switch (linkwidth & 7) { - case 5: - width = 4; - break; - case 4: - width = 2; - break; - case 3: - width = 32; - break; - case 1: - width = 16; - break; - case 0: - default: /* if wrong, assume 8 bit */ - width = 8; - break; - } - - dd->ipath_lbus_width = width; - - if (linkwidth != 0x11) { - ipath_dev_err(dd, "Not configured for 16 bit HT " - "(%x)\n", linkwidth); - if (!(linkwidth & 0xf)) { - ipath_dbg("Will ignore HT lane1 errors\n"); - dd->ipath_flags |= IPATH_8BIT_IN_HT0; - } - } - } - - /* - * this is just for our link to the host, not devices connected - * through tunnel. - */ - if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth)) - ipath_dev_err(dd, "Couldn't read HT link frequency " - "config register\n"); - else { - u32 speed; - switch (linkwidth & 0xf) { - case 6: - speed = 1000; - break; - case 5: - speed = 800; - break; - case 4: - speed = 600; - break; - case 3: - speed = 500; - break; - case 2: - speed = 400; - break; - case 1: - speed = 300; - break; - default: - /* - * assume reserved and vendor-specific are 200... - */ - case 0: - speed = 200; - break; - } - dd->ipath_lbus_speed = speed; - } - - snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info), - "HyperTransport,%uMHz,x%u\n", - dd->ipath_lbus_speed, - dd->ipath_lbus_width); -} - -static int ipath_ht_intconfig(struct ipath_devdata *dd) -{ - int ret; - - if (dd->ipath_intconfig) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig, - dd->ipath_intconfig); /* interrupt address */ - ret = 0; - } else { - ipath_dev_err(dd, "No interrupts enabled, couldn't setup " - "interrupt address\n"); - ret = -EINVAL; - } - - return ret; -} - -static void ipath_ht_irq_update(struct pci_dev *dev, int irq, - struct ht_irq_msg *msg) -{ - struct ipath_devdata *dd = pci_get_drvdata(dev); - u64 prev_intconfig = dd->ipath_intconfig; - - dd->ipath_intconfig = msg->address_lo; - dd->ipath_intconfig |= ((u64) msg->address_hi) << 32; - - /* - * If the previous value of dd->ipath_intconfig is zero, we're - * getting configured for the first time, and must not program the - * intconfig register here (it will be programmed later, when the - * hardware is ready). Otherwise, we should. - */ - if (prev_intconfig) - ipath_ht_intconfig(dd); -} - -/** - * ipath_setup_ht_config - setup the interruptconfig register - * @dd: the infinipath device - * @pdev: the PCI device - * - * setup the interruptconfig register from the HT config info. - * Also clear CRC errors in HT linkcontrol, if necessary. - * This is done only for the real hardware. It is done before - * chip address space is initted, so can't touch infinipath registers - */ -static int ipath_setup_ht_config(struct ipath_devdata *dd, - struct pci_dev *pdev) -{ - int pos, ret; - - ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update); - if (ret < 0) { - ipath_dev_err(dd, "Couldn't create interrupt handler: " - "err %d\n", ret); - goto bail; - } - dd->ipath_irq = ret; - ret = 0; - - /* - * Handle clearing CRC errors in linkctrl register if necessary. We - * do this early, before we ever enable errors or hardware errors, - * mostly to avoid causing the chip to enter freeze mode. - */ - pos = pci_find_capability(pdev, PCI_CAP_ID_HT); - if (!pos) { - ipath_dev_err(dd, "Couldn't find HyperTransport " - "capability; no interrupts\n"); - ret = -ENODEV; - goto bail; - } - do { - u8 cap_type; - - /* - * The HT capability type byte is 3 bytes after the - * capability byte. - */ - if (pci_read_config_byte(pdev, pos + 3, &cap_type)) { - dev_info(&pdev->dev, "Couldn't read config " - "command @ %d\n", pos); - continue; - } - if (!(cap_type & 0xE0)) - slave_or_pri_blk(dd, pdev, pos, cap_type); - } while ((pos = pci_find_next_capability(pdev, pos, - PCI_CAP_ID_HT))); - - dd->ipath_flags |= IPATH_SWAP_PIOBUFS; - -bail: - return ret; -} - -/** - * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff - * @dd: the infinipath device - * - * Called during driver unload. - * This is currently a nop for the HT chip, not for all chips - */ -static void ipath_setup_ht_cleanup(struct ipath_devdata *dd) -{ -} - -/** - * ipath_setup_ht_setextled - set the state of the two external LEDs - * @dd: the infinipath device - * @lst: the L state - * @ltst: the LT state - * - * Set the state of the two external LEDs, to indicate physical and - * logical state of IB link. For this chip (at least with recommended - * board pinouts), LED1 is Green (physical state), and LED2 is Yellow - * (logical state) - * - * Note: We try to match the Mellanox HCA LED behavior as best - * we can. Green indicates physical link state is OK (something is - * plugged in, and we can train). - * Amber indicates the link is logically up (ACTIVE). - * Mellanox further blinks the amber LED to indicate data packet - * activity, but we have no hardware support for that, so it would - * require waking up every 10-20 msecs and checking the counters - * on the chip, and then turning the LED off if appropriate. That's - * visible overhead, so not something we will do. - * - */ -static void ipath_setup_ht_setextled(struct ipath_devdata *dd, - u64 lst, u64 ltst) -{ - u64 extctl; - unsigned long flags = 0; - - /* the diags use the LED to indicate diag info, so we leave - * the external LED alone when the diags are running */ - if (ipath_diag_inuse) - return; - - /* Allow override of LED display for, e.g. Locating system in rack */ - if (dd->ipath_led_override) { - ltst = (dd->ipath_led_override & IPATH_LED_PHYS) - ? INFINIPATH_IBCS_LT_STATE_LINKUP - : INFINIPATH_IBCS_LT_STATE_DISABLED; - lst = (dd->ipath_led_override & IPATH_LED_LOG) - ? INFINIPATH_IBCS_L_STATE_ACTIVE - : INFINIPATH_IBCS_L_STATE_DOWN; - } - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - /* - * start by setting both LED control bits to off, then turn - * on the appropriate bit(s). - */ - if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */ - /* - * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF - * is inverted, because it is normally used to indicate - * a hardware fault at reset, if there were errors - */ - extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON) - | INFINIPATH_EXTC_LEDGBLERR_OFF; - if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) - extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF; - if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) - extctl |= INFINIPATH_EXTC_LEDGBLOK_ON; - } - else { - extctl = dd->ipath_extctrl & - ~(INFINIPATH_EXTC_LED1PRIPORT_ON | - INFINIPATH_EXTC_LED2PRIPORT_ON); - if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) - extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; - if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) - extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON; - } - dd->ipath_extctrl = extctl; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); -} - -static void ipath_init_ht_variables(struct ipath_devdata *dd) -{ - /* - * setup the register offsets, since they are different for each - * chip - */ - dd->ipath_kregs = &ipath_ht_kregs; - dd->ipath_cregs = &ipath_ht_cregs; - - dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - dd->ipath_gpio_sda = IPATH_GPIO_SDA; - dd->ipath_gpio_scl = IPATH_GPIO_SCL; - - /* - * Fill in data for field-values that change in newer chips. - * We dynamically specify only the mask for LINKTRAININGSTATE - * and only the shift for LINKSTATE, as they are the only ones - * that change. Also precalculate the 3 link states of interest - * and the combined mask. - */ - dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT; - dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK; - dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << - dd->ibcs_ls_shift) | dd->ibcs_lts_mask; - dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); - dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); - dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); - - /* - * Fill in data for ibcc field-values that change in newer chips. - * We dynamically specify only the mask for LINKINITCMD - * and only the shift for LINKCMD and MAXPKTLEN, as they are - * the only ones that change. - */ - dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK; - dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT; - dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT; - - /* Fill in shifts for RcvCtrl. */ - dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; - dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT; - dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT; - dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */ - - dd->ipath_i_bitsextant = - (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | - (INFINIPATH_I_RCVAVAIL_MASK << - INFINIPATH_I_RCVAVAIL_SHIFT) | - INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | - INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - - dd->ipath_e_bitsextant = - INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | - INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | - INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | - INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR | - INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP | - INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION | - INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN | - INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK | - INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN | - INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN | - INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT | - INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | - INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED | - INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | - INFINIPATH_E_HARDWARE; - - dd->ipath_hwe_bitsextant = - (INFINIPATH_HWE_HTCMEMPARITYERR_MASK << - INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) | - (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | - (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | - INFINIPATH_HWE_HTCLNKABYTE0CRCERR | - INFINIPATH_HWE_HTCLNKABYTE1CRCERR | - INFINIPATH_HWE_HTCLNKBBYTE0CRCERR | - INFINIPATH_HWE_HTCLNKBBYTE1CRCERR | - INFINIPATH_HWE_HTCMISCERR4 | - INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 | - INFINIPATH_HWE_HTCMISCERR7 | - INFINIPATH_HWE_HTCBUSTREQPARITYERR | - INFINIPATH_HWE_HTCBUSTRESPPARITYERR | - INFINIPATH_HWE_HTCBUSIREQPARITYERR | - INFINIPATH_HWE_RXDSYNCMEMPARITYERR | - INFINIPATH_HWE_MEMBISTFAILED | - INFINIPATH_HWE_COREPLL_FBSLIP | - INFINIPATH_HWE_COREPLL_RFSLIP | - INFINIPATH_HWE_HTBPLL_FBSLIP | - INFINIPATH_HWE_HTBPLL_RFSLIP | - INFINIPATH_HWE_HTAPLL_FBSLIP | - INFINIPATH_HWE_HTAPLL_RFSLIP | - INFINIPATH_HWE_SERDESPLLFAILED | - INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | - INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - - dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; - dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; - dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; - - /* - * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. - * 2 is Some Misc, 3 is reserved for future. - */ - dd->ipath_eep_st_masks[0].hwerrs_to_log = - INFINIPATH_HWE_TXEMEMPARITYERR_MASK << - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; - - dd->ipath_eep_st_masks[1].hwerrs_to_log = - INFINIPATH_HWE_RXEMEMPARITYERR_MASK << - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; - - dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET; - - dd->delay_mult = 2; /* SDR, 4X, can't change */ - - dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; - dd->ipath_link_speed_supported = IPATH_IB_SDR; - dd->ipath_link_width_enabled = IB_WIDTH_4X; - dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; - /* these can't change for this chip, so set once */ - dd->ipath_link_width_active = dd->ipath_link_width_enabled; - dd->ipath_link_speed_active = dd->ipath_link_speed_enabled; -} - -/** - * ipath_ht_init_hwerrors - enable hardware errors - * @dd: the infinipath device - * - * now that we have finished initializing everything that might reasonably - * cause a hardware error, and cleared those errors bits as they occur, - * we can enable hardware errors in the mask (potentially enabling - * freeze mode), and enable hardware errors as errors (along with - * everything else) in errormask - */ -static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) -{ - ipath_err_t val; - u64 extsval; - - extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); - - if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) - ipath_dev_err(dd, "MemBIST did not complete!\n"); - if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT) - ipath_dbg("MemBIST corrected\n"); - - ipath_check_htlink(dd); - - /* barring bugs, all hwerrors become interrupts, which can */ - val = -1LL; - /* don't look at crc lane1 if 8 bit */ - if (dd->ipath_flags & IPATH_8BIT_IN_HT0) - val &= ~infinipath_hwe_htclnkabyte1crcerr; - /* don't look at crc lane1 if 8 bit */ - if (dd->ipath_flags & IPATH_8BIT_IN_HT1) - val &= ~infinipath_hwe_htclnkbbyte1crcerr; - - /* - * disable RXDSYNCMEMPARITY because external serdes is unused, - * and therefore the logic will never be used or initialized, - * and uninitialized state will normally result in this error - * being asserted. Similarly for the external serdess pll - * lock signal. - */ - val &= ~(INFINIPATH_HWE_SERDESPLLFAILED | - INFINIPATH_HWE_RXDSYNCMEMPARITYERR); - - /* - * Disable MISCERR4 because of an inversion in the HT core - * logic checking for errors that cause this bit to be set. - * The errata can also cause the protocol error bit to be set - * in the HT config space linkerror register(s). - */ - val &= ~INFINIPATH_HWE_HTCMISCERR4; - - /* - * PLL ignored because unused MDIO interface has a logic problem - */ - if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9) - val &= ~INFINIPATH_HWE_SERDESPLLFAILED; - dd->ipath_hwerrmask = val; -} - - - - -/** - * ipath_ht_bringup_serdes - bring up the serdes - * @dd: the infinipath device - */ -static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) -{ - u64 val, config1; - int ret = 0, change = 0; - - ipath_dbg("Trying to bringup serdes\n"); - - if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) & - INFINIPATH_HWE_SERDESPLLFAILED) - { - ipath_dbg("At start, serdes PLL failed bit set in " - "hwerrstatus, clearing and continuing\n"); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - INFINIPATH_HWE_SERDESPLLFAILED); - } - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); - - ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx " - "config1=%llx, sstatus=%llx xgxs %llx\n", - (unsigned long long) val, (unsigned long long) config1, - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - - /* force reset on */ - val |= INFINIPATH_SERDC0_RESET_PLL - /* | INFINIPATH_SERDC0_RESET_MASK */ - ; - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); - udelay(15); /* need pll reset set at least for a bit */ - - if (val & INFINIPATH_SERDC0_RESET_PLL) { - u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL; - /* set lane resets, and tx idle, during pll reset */ - val2 |= INFINIPATH_SERDC0_RESET_MASK | - INFINIPATH_SERDC0_TXIDLE; - ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing " - "%llx)\n", (unsigned long long) val2); - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, - val2); - /* - * be sure chip saw it - */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - /* - * need pll reset clear at least 11 usec before lane - * resets cleared; give it a few more - */ - udelay(15); - val = val2; /* for check below */ - } - - if (val & (INFINIPATH_SERDC0_RESET_PLL | - INFINIPATH_SERDC0_RESET_MASK | - INFINIPATH_SERDC0_TXIDLE)) { - val &= ~(INFINIPATH_SERDC0_RESET_PLL | - INFINIPATH_SERDC0_RESET_MASK | - INFINIPATH_SERDC0_TXIDLE); - /* clear them */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, - val); - } - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - if (val & INFINIPATH_XGXS_RESET) { - /* normally true after boot */ - val &= ~INFINIPATH_XGXS_RESET; - change = 1; - } - if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & - INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { - /* need to compensate for Tx inversion in partner */ - val &= ~(INFINIPATH_XGXS_RX_POL_MASK << - INFINIPATH_XGXS_RX_POL_SHIFT); - val |= dd->ipath_rx_pol_inv << - INFINIPATH_XGXS_RX_POL_SHIFT; - change = 1; - } - if (change) - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - - /* clear current and de-emphasis bits */ - config1 &= ~0x0ffffffff00ULL; - /* set current to 20ma */ - config1 |= 0x00000000000ULL; - /* set de-emphasis to -5.68dB */ - config1 |= 0x0cccc000000ULL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1); - - ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx " - "config1=%llx, sstatus=%llx xgxs %llx\n", - (unsigned long long) val, (unsigned long long) config1, - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - - return ret; /* for now, say we always succeeded */ -} - -/** - * ipath_ht_quiet_serdes - set serdes to txidle - * @dd: the infinipath device - * driver is being unloaded - */ -static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) -{ - u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - - val |= INFINIPATH_SERDC0_TXIDLE; - ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", - (unsigned long long) val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); -} - -/** - * ipath_pe_put_tid - write a TID in chip - * @dd: the infinipath device - * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected - * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing - * - * This exists as a separate routine to allow for special locking etc. - * It's used for both the full cleanup on exit, as well as the normal - * setup and teardown. - */ -static void ipath_ht_put_tid(struct ipath_devdata *dd, - u64 __iomem *tidptr, u32 type, - unsigned long pa) -{ - if (!dd->ipath_kregbase) - return; - - if (pa != dd->ipath_tidinvalid) { - if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { - dev_info(&dd->pcidev->dev, - "physaddr %lx has more than " - "40 bits, using only 40!!!\n", pa); - pa &= INFINIPATH_RT_ADDR_MASK; - } - if (type == RCVHQ_RCV_TYPE_EAGER) - pa |= dd->ipath_tidtemplate; - else { - /* in words (fixed, full page). */ - u64 lenvalid = PAGE_SIZE >> 2; - lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT; - pa |= lenvalid | INFINIPATH_RT_VALID; - } - } - - writeq(pa, tidptr); -} - - -/** - * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager - * @dd: the infinipath device - * @port: the port - * - * Used from ipath_close(), and at chip initialization. - */ -static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port) -{ - u64 __iomem *tidbase; - int i; - - if (!dd->ipath_kregbase) - return; - - ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port); - - /* - * need to invalidate all of the expected TID entries for this - * port, so we don't have valid entries that might somehow get - * used (early in next use of this port, or through some bug) - */ - tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvtidbase + - port * dd->ipath_rcvtidcnt * - sizeof(*tidbase)); - for (i = 0; i < dd->ipath_rcvtidcnt; i++) - ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, - dd->ipath_tidinvalid); - - tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvegrbase + - port * dd->ipath_rcvegrcnt * - sizeof(*tidbase)); - - for (i = 0; i < dd->ipath_rcvegrcnt; i++) - ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, - dd->ipath_tidinvalid); -} - -/** - * ipath_ht_tidtemplate - setup constants for TID updates - * @dd: the infinipath device - * - * We setup stuff that we use a lot, to avoid calculating each time - */ -static void ipath_ht_tidtemplate(struct ipath_devdata *dd) -{ - dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2; - dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT; - dd->ipath_tidtemplate |= INFINIPATH_RT_VALID; - - /* - * work around chip errata bug 7358, by marking invalid tids - * as having max length - */ - dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) << - INFINIPATH_RT_BUFSIZE_SHIFT; -} - -static int ipath_ht_early_init(struct ipath_devdata *dd) -{ - u32 __iomem *piobuf; - u32 pioincr, val32; - int i; - - /* - * one cache line; long IB headers will spill over into received - * buffer - */ - dd->ipath_rcvhdrentsize = 16; - dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; - - /* - * For HT, we allocate a somewhat overly large eager buffer, - * such that we can guarantee that we can receive the largest - * packet that we can send out. To truly support a 4KB MTU, - * we need to bump this to a large value. To date, other than - * testing, we have never encountered an HCA that can really - * send 4KB MTU packets, so we do not handle that (we'll get - * errors interrupts if we ever see one). - */ - dd->ipath_rcvegrbufsize = dd->ipath_piosize2k; - - /* - * the min() check here is currently a nop, but it may not - * always be, depending on just how we do ipath_rcvegrbufsize - */ - dd->ipath_ibmaxlen = min(dd->ipath_piosize2k, - dd->ipath_rcvegrbufsize); - dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; - ipath_ht_tidtemplate(dd); - - /* - * zero all the TID entries at startup. We do this for sanity, - * in case of a previous driver crash of some kind, and also - * because the chip powers up with these memories in an unknown - * state. Use portcnt, not cfgports, since this is for the - * full chip, not for current (possibly different) configuration - * value. - * Chip Errata bug 6447 - */ - for (val32 = 0; val32 < dd->ipath_portcnt; val32++) - ipath_ht_clear_tids(dd, val32); - - /* - * write the pbc of each buffer, to be sure it's initialized, then - * cancel all the buffers, and also abort any packets that might - * have been in flight for some reason (the latter is for driver - * unload/reload, but isn't a bad idea at first init). PIO send - * isn't enabled at this point, so there is no danger of sending - * these out on the wire. - * Chip Errata bug 6610 - */ - piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) + - dd->ipath_piobufbase); - pioincr = dd->ipath_palign / sizeof(*piobuf); - for (i = 0; i < dd->ipath_piobcnt2k; i++) { - /* - * reasonable word count, just to init pbc - */ - writel(16, piobuf); - piobuf += pioincr; - } - - ipath_get_eeprom_info(dd); - if (dd->ipath_boardrev == 5) { - /* - * Later production QHT7040 has same changes as QHT7140, so - * can use GPIO interrupts. They have serial #'s starting - * with 128, rather than 112. - */ - if (dd->ipath_serial[0] == '1' && - dd->ipath_serial[1] == '2' && - dd->ipath_serial[2] == '8') - dd->ipath_flags |= IPATH_GPIO_INTR; - else { - ipath_dev_err(dd, "Unsupported InfiniPath board " - "(serial number %.16s)!\n", - dd->ipath_serial); - return 1; - } - } - - if (dd->ipath_minrev >= 4) { - /* Rev4+ reports extra errors via internal GPIO pins */ - dd->ipath_flags |= IPATH_GPIO_ERRINTRS; - dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK; - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - } - - return 0; -} - - -/** - * ipath_init_ht_get_base_info - set chip-specific flags for user code - * @dd: the infinipath device - * @kbase: ipath_base_info pointer - * - * We set the PCIE flag because the lower bandwidth on PCIe vs - * HyperTransport can affect some user packet algorithms. - */ -static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) -{ - struct ipath_base_info *kinfo = kbase; - - kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT | - IPATH_RUNTIME_PIO_REGSWAPPED; - - if (pd->port_dd->ipath_minrev < 4) - kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY; - - return 0; -} - -static void ipath_ht_free_irq(struct ipath_devdata *dd) -{ - free_irq(dd->ipath_irq, dd); - ht_destroy_irq(dd->ipath_irq); - dd->ipath_irq = 0; - dd->ipath_intconfig = 0; -} - -static struct ipath_message_header * -ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) -{ - return (struct ipath_message_header *) - &rhf_addr[sizeof(u64) / sizeof(u32)]; -} - -static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports) -{ - dd->ipath_portcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); - dd->ipath_p0_rcvegrcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); -} - -static void ipath_ht_read_counters(struct ipath_devdata *dd, - struct infinipath_counters *cntrs) -{ - cntrs->LBIntCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt)); - cntrs->LBFlowStallCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt)); - cntrs->TxSDmaDescCnt = 0; - cntrs->TxUnsupVLErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt)); - cntrs->TxDataPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt)); - cntrs->TxFlowPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt)); - cntrs->TxDwordCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt)); - cntrs->TxLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt)); - cntrs->TxMaxMinLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt)); - cntrs->TxUnderrunCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt)); - cntrs->TxFlowStallCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt)); - cntrs->TxDroppedPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt)); - cntrs->RxDroppedPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt)); - cntrs->RxDataPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt)); - cntrs->RxFlowPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt)); - cntrs->RxDwordCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt)); - cntrs->RxLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt)); - cntrs->RxMaxMinLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt)); - cntrs->RxICRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt)); - cntrs->RxVCRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt)); - cntrs->RxFlowCtrlErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt)); - cntrs->RxBadFormatCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt)); - cntrs->RxLinkProblemCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt)); - cntrs->RxEBPCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt)); - cntrs->RxLPCRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt)); - cntrs->RxBufOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt)); - cntrs->RxTIDFullErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt)); - cntrs->RxTIDValidErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt)); - cntrs->RxPKeyMismatchCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt)); - cntrs->RxP0HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt)); - cntrs->RxP1HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt)); - cntrs->RxP2HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt)); - cntrs->RxP3HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt)); - cntrs->RxP4HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt)); - cntrs->RxP5HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt)); - cntrs->RxP6HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt)); - cntrs->RxP7HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt)); - cntrs->RxP8HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt)); - cntrs->RxP9HdrEgrOvflCnt = 0; - cntrs->RxP10HdrEgrOvflCnt = 0; - cntrs->RxP11HdrEgrOvflCnt = 0; - cntrs->RxP12HdrEgrOvflCnt = 0; - cntrs->RxP13HdrEgrOvflCnt = 0; - cntrs->RxP14HdrEgrOvflCnt = 0; - cntrs->RxP15HdrEgrOvflCnt = 0; - cntrs->RxP16HdrEgrOvflCnt = 0; - cntrs->IBStatusChangeCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt)); - cntrs->IBLinkErrRecoveryCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt)); - cntrs->IBLinkDownedCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt)); - cntrs->IBSymbolErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt)); - cntrs->RxVL15DroppedPktCnt = 0; - cntrs->RxOtherLocalPhyErrCnt = 0; - cntrs->PcieRetryBufDiagQwordCnt = 0; - cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs; - cntrs->LocalLinkIntegrityErrCnt = - (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? - dd->ipath_lli_errs : dd->ipath_lli_errors; - cntrs->RxVlErrCnt = 0; - cntrs->RxDlidFltrCnt = 0; -} - - -/* no interrupt fallback for these chips */ -static int ipath_ht_nointr_fallback(struct ipath_devdata *dd) -{ - return 0; -} - - -/* - * reset the XGXS (between serdes and IBC). Slightly less intrusive - * than resetting the IBC or external link state, and useful in some - * cases to cause some retraining. To do this right, we reset IBC - * as well. - */ -static void ipath_ht_xgxs_reset(struct ipath_devdata *dd) -{ - u64 val, prev_val; - - prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - val = prev_val | INFINIPATH_XGXS_RESET; - prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control & ~INFINIPATH_C_LINKENABLE); - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); -} - - -static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which) -{ - int ret; - - switch (which) { - case IPATH_IB_CFG_LWID: - ret = dd->ipath_link_width_active; - break; - case IPATH_IB_CFG_SPD: - ret = dd->ipath_link_speed_active; - break; - case IPATH_IB_CFG_LWID_ENB: - ret = dd->ipath_link_width_enabled; - break; - case IPATH_IB_CFG_SPD_ENB: - ret = dd->ipath_link_speed_enabled; - break; - default: - ret = -ENOTSUPP; - break; - } - return ret; -} - - -/* we assume range checking is already done, if needed */ -static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) -{ - int ret = 0; - - if (which == IPATH_IB_CFG_LWID_ENB) - dd->ipath_link_width_enabled = val; - else if (which == IPATH_IB_CFG_SPD_ENB) - dd->ipath_link_speed_enabled = val; - else - ret = -ENOTSUPP; - return ret; -} - - -static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b) -{ -} - - -static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) -{ - ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs), - ipath_ib_linktrstate(dd, ibcs)); - return 0; -} - - -/** - * ipath_init_iba6110_funcs - set up the chip-specific function pointers - * @dd: the infinipath device - * - * This is global, and is called directly at init to set up the - * chip-specific function pointers for later use. - */ -void ipath_init_iba6110_funcs(struct ipath_devdata *dd) -{ - dd->ipath_f_intrsetup = ipath_ht_intconfig; - dd->ipath_f_bus = ipath_setup_ht_config; - dd->ipath_f_reset = ipath_setup_ht_reset; - dd->ipath_f_get_boardname = ipath_ht_boardname; - dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors; - dd->ipath_f_early_init = ipath_ht_early_init; - dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors; - dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes; - dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes; - dd->ipath_f_clear_tids = ipath_ht_clear_tids; - dd->ipath_f_put_tid = ipath_ht_put_tid; - dd->ipath_f_cleanup = ipath_setup_ht_cleanup; - dd->ipath_f_setextled = ipath_setup_ht_setextled; - dd->ipath_f_get_base_info = ipath_ht_get_base_info; - dd->ipath_f_free_irq = ipath_ht_free_irq; - dd->ipath_f_tidtemplate = ipath_ht_tidtemplate; - dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback; - dd->ipath_f_get_msgheader = ipath_ht_get_msgheader; - dd->ipath_f_config_ports = ipath_ht_config_ports; - dd->ipath_f_read_counters = ipath_ht_read_counters; - dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset; - dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg; - dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg; - dd->ipath_f_config_jint = ipath_ht_config_jint; - dd->ipath_f_ib_updown = ipath_ht_ib_updown; - - /* - * initialize chip-specific variables - */ - ipath_init_ht_variables(dd); -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_init_chip.c b/kernel/drivers/infiniband/hw/ipath/ipath_init_chip.c deleted file mode 100644 index be2a60e14..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ /dev/null @@ -1,1066 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/pci.h> -#include <linux/netdevice.h> -#include <linux/moduleparam.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/vmalloc.h> - -#include "ipath_kernel.h" -#include "ipath_common.h" - -/* - * min buffers we want to have per port, after driver - */ -#define IPATH_MIN_USER_PORT_BUFCNT 7 - -/* - * Number of ports we are configured to use (to allow for more pio - * buffers per port, etc.) Zero means use chip value. - */ -static ushort ipath_cfgports; - -module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO); -MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); - -/* - * Number of buffers reserved for driver (verbs and layered drivers.) - * Initialized based on number of PIO buffers if not set via module interface. - * The problem with this is that it's global, but we'll use different - * numbers for different chip types. - */ -static ushort ipath_kpiobufs; - -static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp); - -module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort, - &ipath_kpiobufs, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); - -/** - * create_port0_egr - allocate the eager TID buffers - * @dd: the infinipath device - * - * This code is now quite different for user and kernel, because - * the kernel uses skb's, for the accelerated network performance. - * This is the kernel (port0) version. - * - * Allocate the eager TID buffers and program them into infinipath. - * We use the network layer alloc_skb() allocator to allocate the - * memory, and either use the buffers as is for things like verbs - * packets, or pass the buffers up to the ipath layered driver and - * thence the network layer, replacing them as we do so (see - * ipath_rcv_layer()). - */ -static int create_port0_egr(struct ipath_devdata *dd) -{ - unsigned e, egrcnt; - struct ipath_skbinfo *skbinfo; - int ret; - - egrcnt = dd->ipath_p0_rcvegrcnt; - - skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt); - if (skbinfo == NULL) { - ipath_dev_err(dd, "allocation error for eager TID " - "skb array\n"); - ret = -ENOMEM; - goto bail; - } - for (e = 0; e < egrcnt; e++) { - /* - * This is a bit tricky in that we allocate extra - * space for 2 bytes of the 14 byte ethernet header. - * These two bytes are passed in the ipath header so - * the rest of the data is word aligned. We allocate - * 4 bytes so that the data buffer stays word aligned. - * See ipath_kreceive() for more details. - */ - skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL); - if (!skbinfo[e].skb) { - ipath_dev_err(dd, "SKB allocation error for " - "eager TID %u\n", e); - while (e != 0) - dev_kfree_skb(skbinfo[--e].skb); - vfree(skbinfo); - ret = -ENOMEM; - goto bail; - } - } - /* - * After loop above, so we can test non-NULL to see if ready - * to use at receive, etc. - */ - dd->ipath_port0_skbinfo = skbinfo; - - for (e = 0; e < egrcnt; e++) { - dd->ipath_port0_skbinfo[e].phys = - ipath_map_single(dd->pcidev, - dd->ipath_port0_skbinfo[e].skb->data, - dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); - dd->ipath_f_put_tid(dd, e + (u64 __iomem *) - ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), - RCVHQ_RCV_TYPE_EAGER, - dd->ipath_port0_skbinfo[e].phys); - } - - ret = 0; - -bail: - return ret; -} - -static int bringup_link(struct ipath_devdata *dd) -{ - u64 val, ibc; - int ret = 0; - - /* hold IBC in reset */ - dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); - - /* - * set initial max size pkt IBC will send, including ICRC; it's the - * PIO buffer size in dwords, less 1; also see ipath_set_mtu() - */ - val = (dd->ipath_ibmaxlen >> 2) + 1; - ibc = val << dd->ibcc_mpl_shift; - - /* flowcontrolwatermark is in units of KBytes */ - ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT; - /* - * How often flowctrl sent. More or less in usecs; balance against - * watermark value, so that in theory senders always get a flow - * control update in time to not let the IB link go idle. - */ - ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT; - /* max error tolerance */ - ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; - /* use "real" buffer space for */ - ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT; - /* IB credit flow control. */ - ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; - /* initially come up waiting for TS1, without sending anything. */ - dd->ipath_ibcctrl = ibc; - /* - * Want to start out with both LINKCMD and LINKINITCMD in NOP - * (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that - * to stay a NOP. Flag that we are disabled, for the (unlikely) - * case that some recovery path is trying to bring the link up - * before we are ready. - */ - ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE << - INFINIPATH_IBCC_LINKINITCMD_SHIFT; - dd->ipath_flags |= IPATH_IB_LINK_DISABLED; - ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n", - (unsigned long long) ibc); - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc); - - // be sure chip saw it - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - - ret = dd->ipath_f_bringup_serdes(dd); - - if (ret) - dev_info(&dd->pcidev->dev, "Could not initialize SerDes, " - "not usable\n"); - else { - /* enable IBC */ - dd->ipath_control |= INFINIPATH_C_LINKENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); - } - - return ret; -} - -static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd) -{ - struct ipath_portdata *pd = NULL; - - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (pd) { - pd->port_dd = dd; - pd->port_cnt = 1; - /* The port 0 pkey table is used by the layer interface. */ - pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY; - pd->port_seq_cnt = 1; - } - return pd; -} - -static int init_chip_first(struct ipath_devdata *dd) -{ - struct ipath_portdata *pd; - int ret = 0; - u64 val; - - spin_lock_init(&dd->ipath_kernel_tid_lock); - spin_lock_init(&dd->ipath_user_tid_lock); - spin_lock_init(&dd->ipath_sendctrl_lock); - spin_lock_init(&dd->ipath_uctxt_lock); - spin_lock_init(&dd->ipath_sdma_lock); - spin_lock_init(&dd->ipath_gpio_lock); - spin_lock_init(&dd->ipath_eep_st_lock); - spin_lock_init(&dd->ipath_sdepb_lock); - mutex_init(&dd->ipath_eep_lock); - - /* - * skip cfgports stuff because we are not allocating memory, - * and we don't want problems if the portcnt changed due to - * cfgports. We do still check and report a difference, if - * not same (should be impossible). - */ - dd->ipath_f_config_ports(dd, ipath_cfgports); - if (!ipath_cfgports) - dd->ipath_cfgports = dd->ipath_portcnt; - else if (ipath_cfgports <= dd->ipath_portcnt) { - dd->ipath_cfgports = ipath_cfgports; - ipath_dbg("Configured to use %u ports out of %u in chip\n", - dd->ipath_cfgports, ipath_read_kreg32(dd, - dd->ipath_kregs->kr_portcnt)); - } else { - dd->ipath_cfgports = dd->ipath_portcnt; - ipath_dbg("Tried to configured to use %u ports; chip " - "only supports %u\n", ipath_cfgports, - ipath_read_kreg32(dd, - dd->ipath_kregs->kr_portcnt)); - } - /* - * Allocate full portcnt array, rather than just cfgports, because - * cleanup iterates across all possible ports. - */ - dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt, - GFP_KERNEL); - - if (!dd->ipath_pd) { - ipath_dev_err(dd, "Unable to allocate portdata array, " - "failing\n"); - ret = -ENOMEM; - goto done; - } - - pd = create_portdata0(dd); - if (!pd) { - ipath_dev_err(dd, "Unable to allocate portdata for port " - "0, failing\n"); - ret = -ENOMEM; - goto done; - } - dd->ipath_pd[0] = pd; - - dd->ipath_rcvtidcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); - dd->ipath_rcvtidbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase); - dd->ipath_rcvegrcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); - dd->ipath_rcvegrbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase); - dd->ipath_palign = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); - dd->ipath_piobufbase = - ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase); - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize); - dd->ipath_piosize2k = val & ~0U; - dd->ipath_piosize4k = val >> 32; - if (dd->ipath_piosize4k == 0 && ipath_mtu4096) - ipath_mtu4096 = 0; /* 4KB not supported by this chip */ - dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt); - dd->ipath_piobcnt2k = val & ~0U; - dd->ipath_piobcnt4k = val >> 32; - dd->ipath_pio2kbase = - (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) + - (dd->ipath_piobufbase & 0xffffffff)); - if (dd->ipath_piobcnt4k) { - dd->ipath_pio4kbase = (u32 __iomem *) - (((char __iomem *) dd->ipath_kregbase) + - (dd->ipath_piobufbase >> 32)); - /* - * 4K buffers take 2 pages; we use roundup just to be - * paranoid; we calculate it once here, rather than on - * ever buf allocate - */ - dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k, - dd->ipath_palign); - ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p " - "(%x aligned)\n", - dd->ipath_piobcnt2k, dd->ipath_piosize2k, - dd->ipath_pio2kbase, dd->ipath_piobcnt4k, - dd->ipath_piosize4k, dd->ipath_pio4kbase, - dd->ipath_4kalign); - } - else ipath_dbg("%u 2k piobufs @ %p\n", - dd->ipath_piobcnt2k, dd->ipath_pio2kbase); - -done: - return ret; -} - -/** - * init_chip_reset - re-initialize after a reset, or enable - * @dd: the infinipath device - * - * sanity check at least some of the values after reset, and - * ensure no receive or transmit (explicitly, in case reset - * failed - */ -static int init_chip_reset(struct ipath_devdata *dd) -{ - u32 rtmp; - int i; - unsigned long flags; - - /* - * ensure chip does no sends or receives, tail updates, or - * pioavail updates while we re-initialize - */ - dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift); - for (i = 0; i < dd->ipath_portcnt; i++) { - clear_bit(dd->ipath_r_portenable_shift + i, - &dd->ipath_rcvctrl); - clear_bit(dd->ipath_r_intravail_shift + i, - &dd->ipath_rcvctrl); - } - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl = 0U; /* no sdma, etc */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); - - rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); - if (rtmp != dd->ipath_rcvtidcnt) - dev_info(&dd->pcidev->dev, "tidcnt was %u before " - "reset, now %u, using original\n", - dd->ipath_rcvtidcnt, rtmp); - rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase); - if (rtmp != dd->ipath_rcvtidbase) - dev_info(&dd->pcidev->dev, "tidbase was %u before " - "reset, now %u, using original\n", - dd->ipath_rcvtidbase, rtmp); - rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); - if (rtmp != dd->ipath_rcvegrcnt) - dev_info(&dd->pcidev->dev, "egrcnt was %u before " - "reset, now %u, using original\n", - dd->ipath_rcvegrcnt, rtmp); - rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase); - if (rtmp != dd->ipath_rcvegrbase) - dev_info(&dd->pcidev->dev, "egrbase was %u before " - "reset, now %u, using original\n", - dd->ipath_rcvegrbase, rtmp); - - return 0; -} - -static int init_pioavailregs(struct ipath_devdata *dd) -{ - int ret; - - dd->ipath_pioavailregs_dma = dma_alloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys, - GFP_KERNEL); - if (!dd->ipath_pioavailregs_dma) { - ipath_dev_err(dd, "failed to allocate PIOavail reg area " - "in memory\n"); - ret = -ENOMEM; - goto done; - } - - /* - * we really want L2 cache aligned, but for current CPUs of - * interest, they are the same. - */ - dd->ipath_statusp = (u64 *) - ((char *)dd->ipath_pioavailregs_dma + - ((2 * L1_CACHE_BYTES + - dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); - /* copy the current value now that it's really allocated */ - *dd->ipath_statusp = dd->_ipath_status; - /* - * setup buffer to hold freeze msg, accessible to apps, - * following statusp - */ - dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1]; - /* and its length */ - dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]); - - ret = 0; - -done: - return ret; -} - -/** - * init_shadow_tids - allocate the shadow TID array - * @dd: the infinipath device - * - * allocate the shadow TID array, so we can ipath_munlock previous - * entries. It may make more sense to move the pageshadow to the - * port data structure, so we only allocate memory for ports actually - * in use, since we at 8k per port, now. - */ -static void init_shadow_tids(struct ipath_devdata *dd) -{ - struct page **pages; - dma_addr_t *addrs; - - pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(struct page *)); - if (!pages) { - ipath_dev_err(dd, "failed to allocate shadow page * " - "array, no expected sends!\n"); - dd->ipath_pageshadow = NULL; - return; - } - - addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(dma_addr_t)); - if (!addrs) { - ipath_dev_err(dd, "failed to allocate shadow dma handle " - "array, no expected sends!\n"); - vfree(pages); - dd->ipath_pageshadow = NULL; - return; - } - - dd->ipath_pageshadow = pages; - dd->ipath_physshadow = addrs; -} - -static void enable_chip(struct ipath_devdata *dd, int reinit) -{ - u32 val; - u64 rcvmask; - unsigned long flags; - int i; - - if (!reinit) - init_waitqueue_head(&ipath_state_wait); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - /* Enable PIO send, and update of PIOavail regs to memory. */ - dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE | - INFINIPATH_S_PIOBUFAVAILUPD; - - /* - * Set the PIO avail update threshold to host memory - * on chips that support it. - */ - if (dd->ipath_pioupd_thresh) - dd->ipath_sendctrl |= dd->ipath_pioupd_thresh - << INFINIPATH_S_UPDTHRESH_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* - * Enable kernel ports' receive and receive interrupt. - * Other ports done as user opens and inits them. - */ - rcvmask = 1ULL; - dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) | - (rcvmask << dd->ipath_r_intravail_shift); - if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) - dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - - /* - * now ready for use. this should be cleared whenever we - * detect a reset, or initiate one. - */ - dd->ipath_flags |= IPATH_INITTED; - - /* - * Init our shadow copies of head from tail values, - * and write head values to match. - */ - val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0); - ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0); - - /* Initialize so we interrupt on next packet received */ - ipath_write_ureg(dd, ur_rcvhdrhead, - dd->ipath_rhdrhead_intr_off | - dd->ipath_pd[0]->port_head, 0); - - /* - * by now pioavail updates to memory should have occurred, so - * copy them into our working/shadow registers; this is in - * case something went wrong with abort, but mostly to get the - * initial values of the generation bit correct. - */ - for (i = 0; i < dd->ipath_pioavregs; i++) { - __le64 pioavail; - - /* - * Chip Errata bug 6641; even and odd qwords>3 are swapped. - */ - if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) - pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; - else - pioavail = dd->ipath_pioavailregs_dma[i]; - /* - * don't need to worry about ipath_pioavailkernel here - * because we will call ipath_chg_pioavailkernel() later - * in initialization, to busy out buffers as needed - */ - dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail); - } - /* can get counters, stats, etc. */ - dd->ipath_flags |= IPATH_PRESENT; -} - -static int init_housekeeping(struct ipath_devdata *dd, int reinit) -{ - char boardn[40]; - int ret = 0; - - /* - * have to clear shadow copies of registers at init that are - * not otherwise set here, or all kinds of bizarre things - * happen with driver on chip reset - */ - dd->ipath_rcvhdrsize = 0; - - /* - * Don't clear ipath_flags as 8bit mode was set before - * entering this func. However, we do set the linkstate to - * unknown, so we can watch for a transition. - * PRESENT is set because we want register reads to work, - * and the kernel infrastructure saw it in config space; - * We clear it if we have failures. - */ - dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT; - dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED | - IPATH_LINKDOWN | IPATH_LINKINIT); - - ipath_cdbg(VERBOSE, "Try to read spc chip revision\n"); - dd->ipath_revision = - ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision); - - /* - * set up fundamental info we need to use the chip; we assume - * if the revision reg and these regs are OK, we don't need to - * special case the rest - */ - dd->ipath_sregbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase); - dd->ipath_cregbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase); - dd->ipath_uregbase = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase); - ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, " - "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase, - dd->ipath_uregbase, dd->ipath_cregbase); - if ((dd->ipath_revision & 0xffffffff) == 0xffffffff - || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff - || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff - || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) { - ipath_dev_err(dd, "Register read failures from chip, " - "giving up initialization\n"); - dd->ipath_flags &= ~IPATH_PRESENT; - ret = -ENODEV; - goto done; - } - - - /* clear diagctrl register, in case diags were running and crashed */ - ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0); - - /* clear the initial reset flag, in case first driver load */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, - INFINIPATH_E_RESET); - - ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n", - (unsigned long long) dd->ipath_revision, - dd->ipath_pcirev); - - if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) & - INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) { - ipath_dev_err(dd, "Driver only handles version %d, " - "chip swversion is %d (%llx), failng\n", - IPATH_CHIP_SWVERSION, - (int)(dd->ipath_revision >> - INFINIPATH_R_SOFTWARE_SHIFT) & - INFINIPATH_R_SOFTWARE_MASK, - (unsigned long long) dd->ipath_revision); - ret = -ENOSYS; - goto done; - } - dd->ipath_majrev = (u8) ((dd->ipath_revision >> - INFINIPATH_R_CHIPREVMAJOR_SHIFT) & - INFINIPATH_R_CHIPREVMAJOR_MASK); - dd->ipath_minrev = (u8) ((dd->ipath_revision >> - INFINIPATH_R_CHIPREVMINOR_SHIFT) & - INFINIPATH_R_CHIPREVMINOR_MASK); - dd->ipath_boardrev = (u8) ((dd->ipath_revision >> - INFINIPATH_R_BOARDID_SHIFT) & - INFINIPATH_R_BOARDID_MASK); - - ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn); - - snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion), - "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " - "SW Compat %u\n", - IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn, - (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) & - INFINIPATH_R_ARCH_MASK, - dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev, - (unsigned)(dd->ipath_revision >> - INFINIPATH_R_SOFTWARE_SHIFT) & - INFINIPATH_R_SOFTWARE_MASK); - - ipath_dbg("%s", dd->ipath_boardversion); - - if (ret) - goto done; - - if (reinit) - ret = init_chip_reset(dd); - else - ret = init_chip_first(dd); - -done: - return ret; -} - -static void verify_interrupt(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - - if (!dd) - return; /* being torn down */ - - /* - * If we don't have any interrupts, let the user know and - * don't bother checking again. - */ - if (dd->ipath_int_counter == 0) { - if (!dd->ipath_f_intr_fallback(dd)) - dev_err(&dd->pcidev->dev, "No interrupts detected, " - "not usable.\n"); - else /* re-arm the timer to see if fallback works */ - mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2); - } else - ipath_cdbg(VERBOSE, "%u interrupts at timer check\n", - dd->ipath_int_counter); -} - -/** - * ipath_init_chip - do the actual initialization sequence on the chip - * @dd: the infinipath device - * @reinit: reinitializing, so don't allocate new memory - * - * Do the actual initialization sequence on the chip. This is done - * both from the init routine called from the PCI infrastructure, and - * when we reset the chip, or detect that it was reset internally, - * or it's administratively re-enabled. - * - * Memory allocation here and in called routines is only done in - * the first case (reinit == 0). We have to be careful, because even - * without memory allocation, we need to re-write all the chip registers - * TIDs, etc. after the reset or enable has completed. - */ -int ipath_init_chip(struct ipath_devdata *dd, int reinit) -{ - int ret = 0; - u32 kpiobufs, defkbufs; - u32 piobufs, uports; - u64 val; - struct ipath_portdata *pd; - gfp_t gfp_flags = GFP_USER | __GFP_COMP; - - ret = init_housekeeping(dd, reinit); - if (ret) - goto done; - - /* - * We could bump this to allow for full rcvegrcnt + rcvtidcnt, - * but then it no longer nicely fits power of two, and since - * we now use routines that backend onto __get_free_pages, the - * rest would be wasted. - */ - dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt, - dd->ipath_rcvhdrcnt); - - /* - * Set up the shadow copies of the piobufavail registers, - * which we compare against the chip registers for now, and - * the in memory DMA'ed copies of the registers. This has to - * be done early, before we calculate lastport, etc. - */ - piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - /* - * calc number of pioavail registers, and save it; we have 2 - * bits per buffer. - */ - dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) - / (sizeof(u64) * BITS_PER_BYTE / 2); - uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; - if (piobufs > 144) - defkbufs = 32 + dd->ipath_pioreserved; - else - defkbufs = 16 + dd->ipath_pioreserved; - - if (ipath_kpiobufs && (ipath_kpiobufs + - (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) { - int i = (int) piobufs - - (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); - if (i < 1) - i = 1; - dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " - "%d for kernel leaves too few for %d user ports " - "(%d each); using %u\n", ipath_kpiobufs, - piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); - /* - * shouldn't change ipath_kpiobufs, because could be - * different for different devices... - */ - kpiobufs = i; - } else if (ipath_kpiobufs) - kpiobufs = ipath_kpiobufs; - else - kpiobufs = defkbufs; - dd->ipath_lastport_piobuf = piobufs - kpiobufs; - dd->ipath_pbufsport = - uports ? dd->ipath_lastport_piobuf / uports : 0; - /* if not an even divisor, some user ports get extra buffers */ - dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf - - (dd->ipath_pbufsport * uports); - if (dd->ipath_ports_extrabuf) - ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to " - "ports <= %u\n", dd->ipath_pbufsport, - dd->ipath_ports_extrabuf); - dd->ipath_lastpioindex = 0; - dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; - /* ipath_pioavailshadow initialized earlier */ - ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " - "each for %u user ports\n", kpiobufs, - piobufs, dd->ipath_pbufsport, uports); - ret = dd->ipath_f_early_init(dd); - if (ret) { - ipath_dev_err(dd, "Early initialization failure\n"); - goto done; - } - - /* - * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be - * done after early_init. - */ - dd->ipath_hdrqlast = - dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize, - dd->ipath_rcvhdrentsize); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, - dd->ipath_rcvhdrsize); - - if (!reinit) { - ret = init_pioavailregs(dd); - init_shadow_tids(dd); - if (ret) - goto done; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, - dd->ipath_pioavailregs_phys); - - /* - * this is to detect s/w errors, which the h/w works around by - * ignoring the low 6 bits of address, if it wasn't aligned. - */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr); - if (val != dd->ipath_pioavailregs_phys) { - ipath_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->ipath_pioavailregs_phys, - (unsigned long long) val); - ret = -EINVAL; - goto done; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP); - - /* - * make sure we are not in freeze, and PIO send enabled, so - * writes to pbc happen - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); - - /* - * before error clears, since we expect serdes pll errors during - * this, the first time after reset - */ - if (bringup_link(dd)) { - dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n"); - ret = -ENETDOWN; - goto done; - } - - /* - * clear any "expected" hwerrs from reset and/or initialization - * clear any that aren't enabled (at least this once), and then - * set the enable mask - */ - dd->ipath_f_init_hwerrors(dd); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - - /* clear all */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); - /* enable errors that are masked, at least this first time. */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - ~dd->ipath_maskederrs); - dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */ - dd->ipath_errormask = - ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); - /* clear any interrupts up to this point (ints still not enabled) */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); - - dd->ipath_f_tidtemplate(dd); - - /* - * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing - * re-init, the simplest way to handle this is to free - * existing, and re-allocate. - * Need to re-create rest of port 0 portdata as well. - */ - pd = dd->ipath_pd[0]; - if (reinit) { - struct ipath_portdata *npd; - - /* - * Alloc and init new ipath_portdata for port0, - * Then free old pd. Could lead to fragmentation, but also - * makes later support for hot-swap easier. - */ - npd = create_portdata0(dd); - if (npd) { - ipath_free_pddata(dd, pd); - dd->ipath_pd[0] = npd; - pd = npd; - } else { - ipath_dev_err(dd, "Unable to allocate portdata" - " for port 0, failing\n"); - ret = -ENOMEM; - goto done; - } - } - ret = ipath_create_rcvhdrq(dd, pd); - if (!ret) - ret = create_port0_egr(dd); - if (ret) { - ipath_dev_err(dd, "failed to allocate kernel port's " - "rcvhdrq and/or egr bufs\n"); - goto done; - } - else - enable_chip(dd, reinit); - - /* after enable_chip, so pioavailshadow setup */ - ipath_chg_pioavailkernel(dd, 0, piobufs, 1); - - /* - * Cancel any possible active sends from early driver load. - * Follows early_init because some chips have to initialize - * PIO buffers in early_init to avoid false parity errors. - * After enable and ipath_chg_pioavailkernel so we can safely - * enable pioavail updates and PIOENABLE; packets are now - * ready to go out. - */ - ipath_cancel_sends(dd, 1); - - if (!reinit) { - /* - * Used when we close a port, for DMA already in flight - * at close. - */ - dd->ipath_dummy_hdrq = dma_alloc_coherent( - &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size, - &dd->ipath_dummy_hdrq_phys, - gfp_flags); - if (!dd->ipath_dummy_hdrq) { - dev_info(&dd->pcidev->dev, - "Couldn't allocate 0x%lx bytes for dummy hdrq\n", - dd->ipath_pd[0]->port_rcvhdrq_size); - /* fallback to just 0'ing */ - dd->ipath_dummy_hdrq_phys = 0UL; - } - } - - /* - * cause retrigger of pending interrupts ignored during init, - * even if we had errors - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); - - if (!dd->ipath_stats_timer_active) { - /* - * first init, or after an admin disable/enable - * set up stats retrieval timer, even if we had errors - * in last portion of setup - */ - init_timer(&dd->ipath_stats_timer); - dd->ipath_stats_timer.function = ipath_get_faststats; - dd->ipath_stats_timer.data = (unsigned long) dd; - /* every 5 seconds; */ - dd->ipath_stats_timer.expires = jiffies + 5 * HZ; - /* takes ~16 seconds to overflow at full IB 4x bandwdith */ - add_timer(&dd->ipath_stats_timer); - dd->ipath_stats_timer_active = 1; - } - - /* Set up SendDMA if chip supports it */ - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - ret = setup_sdma(dd); - - /* Set up HoL state */ - init_timer(&dd->ipath_hol_timer); - dd->ipath_hol_timer.function = ipath_hol_event; - dd->ipath_hol_timer.data = (unsigned long)dd; - dd->ipath_hol_state = IPATH_HOL_UP; - -done: - if (!ret) { - *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT; - if (!dd->ipath_f_intrsetup(dd)) { - /* now we can enable all interrupts from the chip */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, - -1LL); - /* force re-interrupt of any pending interrupts. */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, - 0ULL); - /* chip is usable; mark it as initialized */ - *dd->ipath_statusp |= IPATH_STATUS_INITTED; - - /* - * setup to verify we get an interrupt, and fallback - * to an alternate if necessary and possible - */ - if (!reinit) { - init_timer(&dd->ipath_intrchk_timer); - dd->ipath_intrchk_timer.function = - verify_interrupt; - dd->ipath_intrchk_timer.data = - (unsigned long) dd; - } - dd->ipath_intrchk_timer.expires = jiffies + HZ/2; - add_timer(&dd->ipath_intrchk_timer); - } else - ipath_dev_err(dd, "No interrupts enabled, couldn't " - "setup interrupt address\n"); - - if (dd->ipath_cfgports > ipath_stats.sps_nports) - /* - * sps_nports is a global, so, we set it to - * the highest number of ports of any of the - * chips we find; we never decrement it, at - * least for now. Since this might have changed - * over disable/enable or prior to reset, always - * do the check and potentially adjust. - */ - ipath_stats.sps_nports = dd->ipath_cfgports; - } else - ipath_dbg("Failed (%d) to initialize chip\n", ret); - - /* if ret is non-zero, we probably should do some cleanup - here... */ - return ret; -} - -static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp) -{ - struct ipath_devdata *dd; - unsigned long flags; - unsigned short val; - int ret; - - ret = ipath_parse_ushort(str, &val); - - spin_lock_irqsave(&ipath_devs_lock, flags); - - if (ret < 0) - goto bail; - - if (val == 0) { - ret = -EINVAL; - goto bail; - } - - list_for_each_entry(dd, &ipath_dev_list, ipath_list) { - if (dd->ipath_kregbase) - continue; - if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - - (dd->ipath_cfgports * - IPATH_MIN_USER_PORT_BUFCNT))) - { - ipath_dev_err( - dd, - "Allocating %d PIO bufs for kernel leaves " - "too few for %d user ports (%d each)\n", - val, dd->ipath_cfgports - 1, - IPATH_MIN_USER_PORT_BUFCNT); - ret = -EINVAL; - goto bail; - } - dd->ipath_lastport_piobuf = - dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val; - } - - ipath_kpiobufs = val; - ret = 0; -bail: - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_intr.c b/kernel/drivers/infiniband/hw/ipath/ipath_intr.c deleted file mode 100644 index 01ba79279..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_intr.c +++ /dev/null @@ -1,1273 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/sched.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - - -/* - * Called when we might have an error that is specific to a particular - * PIO buffer, and may need to cancel that buffer, so it can be re-used. - */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd) -{ - u32 piobcnt; - unsigned long sbuf[4]; - /* - * it's possible that sendbuffererror could have bits set; might - * have already done this as a result of hardware error handling - */ - piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - /* read these before writing errorclear */ - sbuf[0] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror); - sbuf[1] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 1); - if (piobcnt > 128) - sbuf[2] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 2); - if (piobcnt > 192) - sbuf[3] = ipath_read_kreg64( - dd, dd->ipath_kregs->kr_sendbuffererror + 3); - else - sbuf[3] = 0; - - if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { - int i; - if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && - time_after(dd->ipath_lastcancel, jiffies)) { - __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, - "SendbufErrs %lx %lx", sbuf[0], - sbuf[1]); - if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) - printk(" %lx %lx ", sbuf[2], sbuf[3]); - printk("\n"); - } - - for (i = 0; i < piobcnt; i++) - if (test_bit(i, sbuf)) - ipath_disarm_piobufs(dd, i, 1); - /* ignore armlaunch errs for a bit */ - dd->ipath_lastcancel = jiffies+3; - } -} - - -/* These are all rcv-related errors which we want to count for stats */ -#define E_SUM_PKTERRS \ - (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ - INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \ - INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \ - INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \ - INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \ - INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP) - -/* These are all send-related errors which we want to count for stats */ -#define E_SUM_ERRS \ - (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \ - INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ - INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \ - INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ - INFINIPATH_E_INVALIDADDR) - -/* - * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore - * errors not related to freeze and cancelling buffers. Can't ignore - * armlaunch because could get more while still cleaning up, and need - * to cancel those as they happen. - */ -#define E_SPKT_ERRS_IGNORE \ - (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ - INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \ - INFINIPATH_E_SPKTLEN) - -/* - * these are errors that can occur when the link changes state while - * a packet is being sent or received. This doesn't cover things - * like EBP or VCRC that can be the result of a sending having the - * link change state, so we receive a "known bad" packet. - */ -#define E_SUM_LINK_PKTERRS \ - (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ - INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ - INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \ - INFINIPATH_E_RUNEXPCHAR) - -static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) -{ - u64 ignore_this_time = 0; - - ipath_disarm_senderrbufs(dd); - if ((errs & E_SUM_LINK_PKTERRS) && - !(dd->ipath_flags & IPATH_LINKACTIVE)) { - /* - * This can happen when SMA is trying to bring the link - * up, but the IB link changes state at the "wrong" time. - * The IB logic then complains that the packet isn't - * valid. We don't want to confuse people, so we just - * don't print them, except at debug - */ - ipath_dbg("Ignoring packet errors %llx, because link not " - "ACTIVE\n", (unsigned long long) errs); - ignore_this_time = errs & E_SUM_LINK_PKTERRS; - } - - return ignore_this_time; -} - -/* generic hw error messages... */ -#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \ - { \ - .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \ - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \ - .msg = "TXE " #a " Memory Parity" \ - } -#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \ - { \ - .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \ - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \ - .msg = "RXE " #a " Memory Parity" \ - } - -static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = { - INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"), - INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"), - - INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF), - INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC), - INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO), - - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO), - INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO), -}; - -/** - * ipath_format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer - */ -static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) -{ - strlcat(msg, "[", msgl); - strlcat(msg, hwmsg, msgl); - strlcat(msg, "]", msgl); -} - -/** - * ipath_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length - */ -void ipath_format_hwerrors(u64 hwerrs, - const struct ipath_hwerror_msgs *hwerrmsgs, - size_t nhwerrmsgs, - char *msg, size_t msgl) -{ - int i; - const int glen = - ARRAY_SIZE(ipath_generic_hwerror_msgs); - - for (i=0; i<glen; i++) { - if (hwerrs & ipath_generic_hwerror_msgs[i].mask) { - ipath_format_hwmsg(msg, msgl, - ipath_generic_hwerror_msgs[i].msg); - } - } - - for (i=0; i<nhwerrmsgs; i++) { - if (hwerrs & hwerrmsgs[i].mask) { - ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg); - } - } -} - -/* return the strings for the most common link states */ -static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs) -{ - char *ret; - u32 state; - - state = ipath_ib_state(dd, ibcs); - if (state == dd->ib_init) - ret = "Init"; - else if (state == dd->ib_arm) - ret = "Arm"; - else if (state == dd->ib_active) - ret = "Active"; - else - ret = "Down"; - return ret; -} - -void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev) -{ - struct ib_event event; - - event.device = &dd->verbs_dev->ibdev; - event.element.port_num = 1; - event.event = ev; - ib_dispatch_event(&event); -} - -static void handle_e_ibstatuschanged(struct ipath_devdata *dd, - ipath_err_t errs) -{ - u32 ltstate, lstate, ibstate, lastlstate; - u32 init = dd->ib_init; - u32 arm = dd->ib_arm; - u32 active = dd->ib_active; - const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - - lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */ - ibstate = ipath_ib_state(dd, ibcs); - /* linkstate at last interrupt */ - lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat); - ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */ - - /* - * Since going into a recovery state causes the link state to go - * down and since recovery is transitory, it is better if we "miss" - * ever seeing the link training state go into recovery (i.e., - * ignore this transition for link state special handling purposes) - * without even updating ipath_lastibcstat. - */ - if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) || - (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) || - (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE)) - goto done; - - /* - * if linkstate transitions into INIT from any of the various down - * states, or if it transitions from any of the up (INIT or better) - * states into any of the down states (except link recovery), then - * call the chip-specific code to take appropriate actions. - */ - if (lstate >= INFINIPATH_IBCS_L_STATE_INIT && - lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) { - /* transitioned to UP */ - if (dd->ipath_f_ib_updown(dd, 1, ibcs)) { - /* link came up, so we must no longer be disabled */ - dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; - ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n"); - goto skip_ibchange; /* chip-code handled */ - } - } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT || - (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) && - ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT && - ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { - int handled; - handled = dd->ipath_f_ib_updown(dd, 0, ibcs); - dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY; - if (handled) { - ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n"); - goto skip_ibchange; /* chip-code handled */ - } - } - - /* - * Significant enough to always print and get into logs, if it was - * unexpected. If it was a requested state change, we'll have - * already cleared the flags, so we won't print this warning - */ - if ((ibstate != arm && ibstate != active) && - (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) { - dev_info(&dd->pcidev->dev, "Link state changed from %s " - "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ? - "ARM" : "ACTIVE", ib_linkstate(dd, ibcs)); - } - - if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || - ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { - u32 lastlts; - lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat); - /* - * Ignore cycling back and forth from Polling.Active to - * Polling.Quiet while waiting for the other end of the link - * to come up, except to try and decide if we are connected - * to a live IB device or not. We will cycle back and - * forth between them if no cable is plugged in, the other - * device is powered off or disabled, etc. - */ - if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || - lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { - if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) && - (++dd->ipath_ibpollcnt == 40)) { - dd->ipath_flags |= IPATH_NOCABLE; - *dd->ipath_statusp |= - IPATH_STATUS_IB_NOCABLE; - ipath_cdbg(LINKVERB, "Set NOCABLE\n"); - } - ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n", - ipath_ibcstatus_str[ltstate], ibstate); - goto skip_ibchange; - } - } - - dd->ipath_ibpollcnt = 0; /* not poll*, now */ - ipath_stats.sps_iblink++; - - if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) { - u64 linkrecov; - linkrecov = ipath_snap_cntr(dd, - dd->ipath_cregs->cr_iblinkerrrecovcnt); - if (linkrecov != dd->ipath_lastlinkrecov) { - ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n", - (unsigned long long) ibcs, - ib_linkstate(dd, ibcs), - ipath_ibcstatus_str[ltstate], - (unsigned long long) linkrecov); - /* and no more until active again */ - dd->ipath_lastlinkrecov = 0; - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); - goto skip_ibchange; - } - } - - if (ibstate == init || ibstate == arm || ibstate == active) { - *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE; - if (ibstate == init || ibstate == arm) { - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - if (dd->ipath_flags & IPATH_LINKACTIVE) - signal_ib_event(dd, IB_EVENT_PORT_ERR); - } - if (ibstate == arm) { - dd->ipath_flags |= IPATH_LINKARMED; - dd->ipath_flags &= ~(IPATH_LINKUNK | - IPATH_LINKINIT | IPATH_LINKDOWN | - IPATH_LINKACTIVE | IPATH_NOCABLE); - ipath_hol_down(dd); - } else if (ibstate == init) { - /* - * set INIT and DOWN. Down is checked by - * most of the other code, but INIT is - * useful to know in a few places. - */ - dd->ipath_flags |= IPATH_LINKINIT | - IPATH_LINKDOWN; - dd->ipath_flags &= ~(IPATH_LINKUNK | - IPATH_LINKARMED | IPATH_LINKACTIVE | - IPATH_NOCABLE); - ipath_hol_down(dd); - } else { /* active */ - dd->ipath_lastlinkrecov = ipath_snap_cntr(dd, - dd->ipath_cregs->cr_iblinkerrrecovcnt); - *dd->ipath_statusp |= - IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; - dd->ipath_flags |= IPATH_LINKACTIVE; - dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT - | IPATH_LINKDOWN | IPATH_LINKARMED | - IPATH_NOCABLE); - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - ipath_restart_sdma(dd); - signal_ib_event(dd, IB_EVENT_PORT_ACTIVE); - /* LED active not handled in chip _f_updown */ - dd->ipath_f_setextled(dd, lstate, ltstate); - ipath_hol_up(dd); - } - - /* - * print after we've already done the work, so as not to - * delay the state changes and notifications, for debugging - */ - if (lstate == lastlstate) - ipath_cdbg(LINKVERB, "Unchanged from last: %s " - "(%x)\n", ib_linkstate(dd, ibcs), ibstate); - else - ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n", - dd->ipath_unit, ib_linkstate(dd, ibcs), - ipath_ibcstatus_str[ltstate], ibstate); - } else { /* down */ - if (dd->ipath_flags & IPATH_LINKACTIVE) - signal_ib_event(dd, IB_EVENT_PORT_ERR); - dd->ipath_flags |= IPATH_LINKDOWN; - dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT - | IPATH_LINKACTIVE | - IPATH_LINKARMED); - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - dd->ipath_lli_counter = 0; - - if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN) - ipath_cdbg(VERBOSE, "Unit %u link state down " - "(state 0x%x), from %s\n", - dd->ipath_unit, lstate, - ib_linkstate(dd, dd->ipath_lastibcstat)); - else - ipath_cdbg(LINKVERB, "Unit %u link state changed " - "to %s (0x%x) from down (%x)\n", - dd->ipath_unit, - ipath_ibcstatus_str[ltstate], - ibstate, lastlstate); - } - -skip_ibchange: - dd->ipath_lastibcstat = ibcs; -done: - return; -} - -static void handle_supp_msgs(struct ipath_devdata *dd, - unsigned supp_msgs, char *msg, u32 msgsz) -{ - /* - * Print the message unless it's ibc status change only, which - * happens so often we never want to count it. - */ - if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { - int iserr; - ipath_err_t mask; - iserr = ipath_decode_err(dd, msg, msgsz, - dd->ipath_lasterror & - ~INFINIPATH_E_IBSTATUSCHANGED); - - mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED; - - /* if we're in debug, then don't mask SDMADISABLED msgs */ - if (ipath_debug & __IPATH_DBG) - mask &= ~INFINIPATH_E_SDMADISABLED; - - if (dd->ipath_lasterror & ~mask) - ipath_dev_err(dd, "Suppressed %u messages for " - "fast-repeating errors (%s) (%llx)\n", - supp_msgs, msg, - (unsigned long long) - dd->ipath_lasterror); - else { - /* - * rcvegrfull and rcvhdrqfull are "normal", for some - * types of processes (mostly benchmarks) that send - * huge numbers of messages, while not processing - * them. So only complain about these at debug - * level. - */ - if (iserr) - ipath_dbg("Suppressed %u messages for %s\n", - supp_msgs, msg); - else - ipath_cdbg(ERRPKT, - "Suppressed %u messages for %s\n", - supp_msgs, msg); - } - } -} - -static unsigned handle_frequent_errors(struct ipath_devdata *dd, - ipath_err_t errs, char *msg, - u32 msgsz, int *noprint) -{ - unsigned long nc; - static unsigned long nextmsg_time; - static unsigned nmsgs, supp_msgs; - - /* - * Throttle back "fast" messages to no more than 10 per 5 seconds. - * This isn't perfect, but it's a reasonable heuristic. If we get - * more than 10, give a 6x longer delay. - */ - nc = jiffies; - if (nmsgs > 10) { - if (time_before(nc, nextmsg_time)) { - *noprint = 1; - if (!supp_msgs++) - nextmsg_time = nc + HZ * 3; - } - else if (supp_msgs) { - handle_supp_msgs(dd, supp_msgs, msg, msgsz); - supp_msgs = 0; - nmsgs = 0; - } - } - else if (!nmsgs++ || time_after(nc, nextmsg_time)) - nextmsg_time = nc + HZ / 2; - - return supp_msgs; -} - -static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs) -{ - unsigned long flags; - int expected; - - if (ipath_debug & __IPATH_DBG) { - char msg[128]; - ipath_decode_err(dd, msg, sizeof msg, errs & - INFINIPATH_E_SDMAERRS); - ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg); - } - if (ipath_debug & __IPATH_VERBDBG) { - unsigned long tl, hd, status, lengen; - tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); - hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); - status = ipath_read_kreg64(dd - , dd->ipath_kregs->kr_senddmastatus); - lengen = ipath_read_kreg64(dd, - dd->ipath_kregs->kr_senddmalengen); - ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx " - "lengen 0x%lx\n", tl, hd, status, lengen); - } - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!expected) - ipath_cancel_sends(dd, 1); -} - -static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat) -{ - unsigned long flags; - int expected; - - if ((istat & INFINIPATH_I_SDMAINT) && - !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - ipath_sdma_intr(dd); - - if (istat & INFINIPATH_I_SDMADISABLED) { - expected = test_bit(IPATH_SDMA_ABORTING, - &dd->ipath_sdma_status); - ipath_dbg("%s SDmaDisabled intr\n", - expected ? "expected" : "unexpected"); - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!expected) - ipath_cancel_sends(dd, 1); - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - tasklet_hi_schedule(&dd->ipath_sdma_abort_task); - } -} - -static int handle_hdrq_full(struct ipath_devdata *dd) -{ - int chkerrpkts = 0; - u32 hd, tl; - u32 i; - - ipath_stats.sps_hdrqfull++; - for (i = 0; i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - if (i == 0) { - /* - * For kernel receive queues, we just want to know - * if there are packets in the queue that we can - * process. - */ - if (pd->port_head != ipath_get_hdrqtail(pd)) - chkerrpkts |= 1 << i; - continue; - } - - /* Skip if user context is not open */ - if (!pd || !pd->port_cnt) - continue; - - /* Don't report the same point multiple times. */ - if (dd->ipath_flags & IPATH_NODMA_RTAIL) - tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i); - else - tl = ipath_get_rcvhdrtail(pd); - if (tl == pd->port_lastrcvhdrqtail) - continue; - - hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i); - if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) { - pd->port_lastrcvhdrqtail = tl; - pd->port_hdrqfull++; - /* flush hdrqfull so that poll() sees it */ - wmb(); - wake_up_interruptible(&pd->port_wait); - } - } - - return chkerrpkts; -} - -static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) -{ - char msg[128]; - u64 ignore_this_time = 0; - u64 iserr = 0; - int chkerrpkts = 0, noprint = 0; - unsigned supp_msgs; - int log_idx; - - /* - * don't report errors that are masked, either at init - * (not set in ipath_errormask), or temporarily (set in - * ipath_maskederrs) - */ - errs &= dd->ipath_errormask & ~dd->ipath_maskederrs; - - supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg, - &noprint); - - /* do these first, they are most important */ - if (errs & INFINIPATH_E_HARDWARE) { - /* reuse same msg buf */ - dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); - } else { - u64 mask; - for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) { - mask = dd->ipath_eep_st_masks[log_idx].errs_to_log; - if (errs & mask) - ipath_inc_eeprom_err(dd, log_idx, 1); - } - } - - if (errs & INFINIPATH_E_SDMAERRS) - handle_sdma_errors(dd, errs); - - if (!noprint && (errs & ~dd->ipath_e_bitsextant)) - ipath_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (errs & ~dd->ipath_e_bitsextant)); - - if (errs & E_SUM_ERRS) - ignore_this_time = handle_e_sum_errs(dd, errs); - else if ((errs & E_SUM_LINK_PKTERRS) && - !(dd->ipath_flags & IPATH_LINKACTIVE)) { - /* - * This can happen when SMA is trying to bring the link - * up, but the IB link changes state at the "wrong" time. - * The IB logic then complains that the packet isn't - * valid. We don't want to confuse people, so we just - * don't print them, except at debug - */ - ipath_dbg("Ignoring packet errors %llx, because link not " - "ACTIVE\n", (unsigned long long) errs); - ignore_this_time = errs & E_SUM_LINK_PKTERRS; - } - - if (supp_msgs == 250000) { - int s_iserr; - /* - * It's not entirely reasonable assuming that the errors set - * in the last clear period are all responsible for the - * problem, but the alternative is to assume it's the only - * ones on this particular interrupt, which also isn't great - */ - dd->ipath_maskederrs |= dd->ipath_lasterror | errs; - - dd->ipath_errormask &= ~dd->ipath_maskederrs; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); - s_iserr = ipath_decode_err(dd, msg, sizeof msg, - dd->ipath_maskederrs); - - if (dd->ipath_maskederrs & - ~(INFINIPATH_E_RRCVEGRFULL | - INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) - ipath_dev_err(dd, "Temporarily disabling " - "error(s) %llx reporting; too frequent (%s)\n", - (unsigned long long) dd->ipath_maskederrs, - msg); - else { - /* - * rcvegrfull and rcvhdrqfull are "normal", - * for some types of processes (mostly benchmarks) - * that send huge numbers of messages, while not - * processing them. So only complain about - * these at debug level. - */ - if (s_iserr) - ipath_dbg("Temporarily disabling reporting " - "too frequent queue full errors (%s)\n", - msg); - else - ipath_cdbg(ERRPKT, - "Temporarily disabling reporting too" - " frequent packet errors (%s)\n", - msg); - } - - /* - * Re-enable the masked errors after around 3 minutes. in - * ipath_get_faststats(). If we have a series of fast - * repeating but different errors, the interval will keep - * stretching out, but that's OK, as that's pretty - * catastrophic. - */ - dd->ipath_unmasktime = jiffies + HZ * 180; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs); - if (ignore_this_time) - errs &= ~ignore_this_time; - if (errs & ~dd->ipath_lasterror) { - errs &= ~dd->ipath_lasterror; - /* never suppress duplicate hwerrors or ibstatuschange */ - dd->ipath_lasterror |= errs & - ~(INFINIPATH_E_HARDWARE | - INFINIPATH_E_IBSTATUSCHANGED); - } - - if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) { - dd->ipath_spectriggerhit++; - ipath_dbg("%lu special trigger hits\n", - dd->ipath_spectriggerhit); - } - - /* likely due to cancel; so suppress message unless verbose */ - if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && - time_after(dd->ipath_lastcancel, jiffies)) { - /* armlaunch takes precedence; it often causes both. */ - ipath_cdbg(VERBOSE, - "Suppressed %s error (%llx) after sendbuf cancel\n", - (errs & INFINIPATH_E_SPIOARMLAUNCH) ? - "armlaunch" : "sendpktlen", (unsigned long long)errs); - errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); - } - - if (!errs) - return 0; - - if (!noprint) { - ipath_err_t mask; - /* - * The ones we mask off are handled specially below - * or above. Also mask SDMADISABLED by default as it - * is too chatty. - */ - mask = INFINIPATH_E_IBSTATUSCHANGED | - INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED; - - /* if we're in debug, then don't mask SDMADISABLED msgs */ - if (ipath_debug & __IPATH_DBG) - mask &= ~INFINIPATH_E_SDMADISABLED; - - ipath_decode_err(dd, msg, sizeof msg, errs & ~mask); - } else - /* so we don't need if (!noprint) at strlcat's below */ - *msg = 0; - - if (errs & E_SUM_PKTERRS) { - ipath_stats.sps_pkterrs++; - chkerrpkts = 1; - } - if (errs & E_SUM_ERRS) - ipath_stats.sps_errs++; - - if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) { - ipath_stats.sps_crcerrs++; - chkerrpkts = 1; - } - iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS); - - - /* - * We don't want to print these two as they happen, or we can make - * the situation even worse, because it takes so long to print - * messages to serial consoles. Kernel ports get printed from - * fast_stats, no more than every 5 seconds, user ports get printed - * on close - */ - if (errs & INFINIPATH_E_RRCVHDRFULL) - chkerrpkts |= handle_hdrq_full(dd); - if (errs & INFINIPATH_E_RRCVEGRFULL) { - struct ipath_portdata *pd = dd->ipath_pd[0]; - - /* - * since this is of less importance and not likely to - * happen without also getting hdrfull, only count - * occurrences; don't check each port (or even the kernel - * vs user) - */ - ipath_stats.sps_etidfull++; - if (pd->port_head != ipath_get_hdrqtail(pd)) - chkerrpkts |= 1; - } - - /* - * do this before IBSTATUSCHANGED, in case both bits set in a single - * interrupt; we want the STATUSCHANGE to "win", so we do our - * internal copy of state machine correctly - */ - if (errs & INFINIPATH_E_RIBLOSTLINK) { - /* - * force through block below - */ - errs |= INFINIPATH_E_IBSTATUSCHANGED; - ipath_stats.sps_iblink++; - dd->ipath_flags |= IPATH_LINKDOWN; - dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT - | IPATH_LINKARMED | IPATH_LINKACTIVE); - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - - ipath_dbg("Lost link, link now down (%s)\n", - ipath_ibcstatus_str[ipath_read_kreg64(dd, - dd->ipath_kregs->kr_ibcstatus) & 0xf]); - } - if (errs & INFINIPATH_E_IBSTATUSCHANGED) - handle_e_ibstatuschanged(dd, errs); - - if (errs & INFINIPATH_E_RESET) { - if (!noprint) - ipath_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); - dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */ - /* mark as having had error */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; - } - - if (!noprint && *msg) { - if (iserr) - ipath_dev_err(dd, "%s error\n", msg); - } - if (dd->ipath_state_wanted & dd->ipath_flags) { - ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " - "waking\n", dd->ipath_state_wanted, - dd->ipath_flags); - wake_up_interruptible(&ipath_state_wait); - } - - return chkerrpkts; -} - -/* - * try to cleanup as much as possible for anything that might have gone - * wrong while in freeze mode, such as pio buffers being written by user - * processes (causing armlaunch), send errors due to going into freeze mode, - * etc., and try to avoid causing extra interrupts while doing so. - * Forcibly update the in-memory pioavail register copies after cleanup - * because the chip won't do it while in freeze mode (the register values - * themselves are kept correct). - * Make sure that we don't lose any important interrupts by using the chip - * feature that says that writing 0 to a bit in *clear that is set in - * *status will cause an interrupt to be generated again (if allowed by - * the *mask value). - */ -void ipath_clear_freeze(struct ipath_devdata *dd) -{ - /* disable error interrupts, to avoid confusion */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); - - /* also disable interrupts; errormask is sometimes overwriten */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - - ipath_cancel_sends(dd, 1); - - /* clear the freeze, and be sure chip saw it */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - - /* force in-memory update now we are out of freeze */ - ipath_force_pio_avail_update(dd); - - /* - * force new interrupt if any hwerr, error or interrupt bits are - * still set, and clear "safe" send packet errors related to freeze - * and cancelling sends. Re-enable error interrupts before possible - * force of re-interrupt on pending interrupts. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, - E_SPKT_ERRS_IGNORE); - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); -} - - -/* this is separate to allow for better optimization of ipath_intr() */ - -static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp) -{ - /* - * sometimes happen during driver init and unload, don't want - * to process any interrupts at that point - */ - - /* this is just a bandaid, not a fix, if something goes badly - * wrong */ - if (++*unexpectp > 100) { - if (++*unexpectp > 105) { - /* - * ok, we must be taking somebody else's interrupts, - * due to a messed up mptable and/or PIRQ table, so - * unregister the interrupt. We've seen this during - * linuxbios development work, and it may happen in - * the future again. - */ - if (dd->pcidev && dd->ipath_irq) { - ipath_dev_err(dd, "Now %u unexpected " - "interrupts, unregistering " - "interrupt handler\n", - *unexpectp); - ipath_dbg("free_irq of irq %d\n", - dd->ipath_irq); - dd->ipath_f_free_irq(dd); - } - } - if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) { - ipath_dev_err(dd, "%u unexpected interrupts, " - "disabling interrupts completely\n", - *unexpectp); - /* - * disable all interrupts, something is very wrong - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, - 0ULL); - } - } else if (*unexpectp > 1) - ipath_dbg("Interrupt when not ready, should not happen, " - "ignoring\n"); -} - -static noinline void ipath_bad_regread(struct ipath_devdata *dd) -{ - static int allbits; - - /* separate routine, for better optimization of ipath_intr() */ - - /* - * We print the message and disable interrupts, in hope of - * having a better chance of debugging the problem. - */ - ipath_dev_err(dd, - "Read of interrupt status failed (all bits set)\n"); - if (allbits++) { - /* disable all interrupts, something is very wrong */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - if (allbits == 2) { - ipath_dev_err(dd, "Still bad interrupt status, " - "unregistering interrupt\n"); - dd->ipath_f_free_irq(dd); - } else if (allbits > 2) { - if ((allbits % 10000) == 0) - printk("."); - } else - ipath_dev_err(dd, "Disabling interrupts, " - "multiple errors\n"); - } -} - -static void handle_layer_pioavail(struct ipath_devdata *dd) -{ - unsigned long flags; - int ret; - - ret = ipath_ib_piobufavail(dd->verbs_dev); - if (ret > 0) - goto set; - - return; -set: - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); -} - -/* - * Handle receive interrupts for user ports; this means a user - * process was waiting for a packet to arrive, and didn't want - * to poll - */ -static void handle_urcv(struct ipath_devdata *dd, u64 istat) -{ - u64 portr; - int i; - int rcvdint = 0; - - /* - * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and - * test_and_clear_bit(IPATH_PORT_WAITING_URG) below - * would both like timely updates of the bits so that - * we don't pass them by unnecessarily. the rmb() - * here ensures that we see them promptly -- the - * corresponding wmb()'s are in ipath_poll_urgent() - * and ipath_poll_next()... - */ - rmb(); - portr = ((istat >> dd->ipath_i_rcvavail_shift) & - dd->ipath_i_rcvavail_mask) | - ((istat >> dd->ipath_i_rcvurg_shift) & - dd->ipath_i_rcvurg_mask); - for (i = 1; i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - if (portr & (1 << i) && pd && pd->port_cnt) { - if (test_and_clear_bit(IPATH_PORT_WAITING_RCV, - &pd->port_flag)) { - clear_bit(i + dd->ipath_r_intravail_shift, - &dd->ipath_rcvctrl); - wake_up_interruptible(&pd->port_wait); - rcvdint = 1; - } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG, - &pd->port_flag)) { - pd->port_urgent++; - wake_up_interruptible(&pd->port_wait); - } - } - } - if (rcvdint) { - /* only want to take one interrupt, so turn off the rcv - * interrupt for all the ports that we set the rcv_waiting - * (but never for kernel port) - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - } -} - -irqreturn_t ipath_intr(int irq, void *data) -{ - struct ipath_devdata *dd = data; - u64 istat, chk0rcv = 0; - ipath_err_t estat = 0; - irqreturn_t ret; - static unsigned unexpected = 0; - u64 kportrbits; - - ipath_stats.sps_ints++; - - if (dd->ipath_int_counter != (u32) -1) - dd->ipath_int_counter++; - - if (!(dd->ipath_flags & IPATH_PRESENT)) { - /* - * This return value is not great, but we do not want the - * interrupt core code to remove our interrupt handler - * because we don't appear to be handling an interrupt - * during a chip reset. - */ - return IRQ_HANDLED; - } - - /* - * this needs to be flags&initted, not statusp, so we keep - * taking interrupts even after link goes down, etc. - * Also, we *must* clear the interrupt at some point, or we won't - * take it again, which can be real bad for errors, etc... - */ - - if (!(dd->ipath_flags & IPATH_INITTED)) { - ipath_bad_intr(dd, &unexpected); - ret = IRQ_NONE; - goto bail; - } - - istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus); - - if (unlikely(!istat)) { - ipath_stats.sps_nullintr++; - ret = IRQ_NONE; /* not our interrupt, or already handled */ - goto bail; - } - if (unlikely(istat == -1)) { - ipath_bad_regread(dd); - /* don't know if it was our interrupt or not */ - ret = IRQ_NONE; - goto bail; - } - - if (unexpected) - unexpected = 0; - - if (unlikely(istat & ~dd->ipath_i_bitsextant)) - ipath_dev_err(dd, - "interrupt with unknown interrupts %Lx set\n", - (unsigned long long) - istat & ~dd->ipath_i_bitsextant); - else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ - ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", - (unsigned long long) istat); - - if (istat & INFINIPATH_I_ERROR) { - ipath_stats.sps_errints++; - estat = ipath_read_kreg64(dd, - dd->ipath_kregs->kr_errorstatus); - if (!estat) - dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " - "but no error bits set!\n", - (unsigned long long) istat); - else if (estat == -1LL) - /* - * should we try clearing all, or hope next read - * works? - */ - ipath_dev_err(dd, "Read of error status failed " - "(all bits set); ignoring\n"); - else - chk0rcv |= handle_errors(dd, estat); - } - - if (istat & INFINIPATH_I_GPIO) { - /* - * GPIO interrupts fall in two broad classes: - * GPIO_2 indicates (on some HT4xx boards) that a packet - * has arrived for Port 0. Checking for this - * is controlled by flag IPATH_GPIO_INTR. - * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate - * errors that we need to count. Checking for this - * is controlled by flag IPATH_GPIO_ERRINTRS. - */ - u32 gpiostatus; - u32 to_clear = 0; - - gpiostatus = ipath_read_kreg32( - dd, dd->ipath_kregs->kr_gpio_status); - /* First the error-counter case. */ - if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) && - (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) { - /* want to clear the bits we see asserted. */ - to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK); - - /* - * Count appropriately, clear bits out of our copy, - * as they have been "handled". - */ - if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) { - ipath_dbg("FlowCtl on UnsupVL\n"); - dd->ipath_rxfc_unsupvl_errs++; - } - if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) { - ipath_dbg("Overrun Threshold exceeded\n"); - dd->ipath_overrun_thresh_errs++; - } - if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) { - ipath_dbg("Local Link Integrity error\n"); - dd->ipath_lli_errs++; - } - gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK; - } - /* Now the Port0 Receive case */ - if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) && - (dd->ipath_flags & IPATH_GPIO_INTR)) { - /* - * GPIO status bit 2 is set, and we expected it. - * clear it and indicate in p0bits. - * This probably only happens if a Port0 pkt - * arrives at _just_ the wrong time, and we - * handle that by seting chk0rcv; - */ - to_clear |= (1 << IPATH_GPIO_PORT0_BIT); - gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); - chk0rcv = 1; - } - if (gpiostatus) { - /* - * Some unexpected bits remain. If they could have - * caused the interrupt, complain and clear. - * To avoid repetition of this condition, also clear - * the mask. It is almost certainly due to error. - */ - const u32 mask = (u32) dd->ipath_gpio_mask; - - if (mask & gpiostatus) { - ipath_dbg("Unexpected GPIO IRQ bits %x\n", - gpiostatus & mask); - to_clear |= (gpiostatus & mask); - dd->ipath_gpio_mask &= ~(gpiostatus & mask); - ipath_write_kreg(dd, - dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - } - } - if (to_clear) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) to_clear); - } - } - - /* - * Clear the interrupt bits we found set, unless they are receive - * related, in which case we already cleared them above, and don't - * want to clear them again, because we might lose an interrupt. - * Clear it early, so we "know" know the chip will have seen this by - * the time we process the queue, and will re-interrupt if necessary. - * The processor itself won't take the interrupt again until we return. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat); - - /* - * Handle kernel receive queues before checking for pio buffers - * available since receives can overflow; piobuf waiters can afford - * a few extra cycles, since they were waiting anyway, and user's - * waiting for receive are at the bottom. - */ - kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) | - (1ULL << dd->ipath_i_rcvurg_shift); - if (chk0rcv || (istat & kportrbits)) { - istat &= ~kportrbits; - ipath_kreceive(dd->ipath_pd[0]); - } - - if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) | - (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift))) - handle_urcv(dd, istat); - - if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED)) - handle_sdma_intr(dd, istat); - - if (istat & INFINIPATH_I_SPIOBUFAVAIL) { - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* always process; sdma verbs uses PIO for acks and VL15 */ - handle_layer_pioavail(dd); - } - - ret = IRQ_HANDLED; - -bail: - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_kernel.h b/kernel/drivers/infiniband/hw/ipath/ipath_kernel.h deleted file mode 100644 index e08db7020..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_kernel.h +++ /dev/null @@ -1,1375 +0,0 @@ -#ifndef _IPATH_KERNEL_H -#define _IPATH_KERNEL_H -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This header file is the base header file for infinipath kernel code - * ipath_user.h serves a similar purpose for user code. - */ - -#include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/mutex.h> -#include <linux/list.h> -#include <linux/scatterlist.h> -#include <asm/io.h> -#include <rdma/ib_verbs.h> - -#include "ipath_common.h" -#include "ipath_debug.h" -#include "ipath_registers.h" - -/* only s/w major version of InfiniPath we can handle */ -#define IPATH_CHIP_VERS_MAJ 2U - -/* don't care about this except printing */ -#define IPATH_CHIP_VERS_MIN 0U - -/* temporary, maybe always */ -extern struct infinipath_stats ipath_stats; - -#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ -/* - * First-cut critierion for "device is active" is - * two thousand dwords combined Tx, Rx traffic per - * 5-second interval. SMA packets are 64 dwords, - * and occur "a few per second", presumably each way. - */ -#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000) -/* - * Struct used to indicate which errors are logged in each of the - * error-counters that are logged to EEPROM. A counter is incremented - * _once_ (saturating at 255) for each event with any bits set in - * the error or hwerror register masks below. - */ -#define IPATH_EEP_LOG_CNT (4) -struct ipath_eep_log_mask { - u64 errs_to_log; - u64 hwerrs_to_log; -}; - -struct ipath_portdata { - void **port_rcvegrbuf; - dma_addr_t *port_rcvegrbuf_phys; - /* rcvhdrq base, needs mmap before useful */ - void *port_rcvhdrq; - /* kernel virtual address where hdrqtail is updated */ - void *port_rcvhdrtail_kvaddr; - /* - * temp buffer for expected send setup, allocated at open, instead - * of each setup call - */ - void *port_tid_pg_list; - /* when waiting for rcv or pioavail */ - wait_queue_head_t port_wait; - /* - * rcvegr bufs base, physical, must fit - * in 44 bits so 32 bit programs mmap64 44 bit works) - */ - dma_addr_t port_rcvegr_phys; - /* mmap of hdrq, must fit in 44 bits */ - dma_addr_t port_rcvhdrq_phys; - dma_addr_t port_rcvhdrqtailaddr_phys; - /* - * number of opens (including slave subports) on this instance - * (ignoring forks, dup, etc. for now) - */ - int port_cnt; - /* - * how much space to leave at start of eager TID entries for - * protocol use, on each TID - */ - /* instead of calculating it */ - unsigned port_port; - /* non-zero if port is being shared. */ - u16 port_subport_cnt; - /* non-zero if port is being shared. */ - u16 port_subport_id; - /* number of pio bufs for this port (all procs, if shared) */ - u32 port_piocnt; - /* first pio buffer for this port */ - u32 port_pio_base; - /* chip offset of PIO buffers for this port */ - u32 port_piobufs; - /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ - u32 port_rcvegrbuf_chunks; - /* how many egrbufs per chunk */ - u32 port_rcvegrbufs_perchunk; - /* order for port_rcvegrbuf_pages */ - size_t port_rcvegrbuf_size; - /* rcvhdrq size (for freeing) */ - size_t port_rcvhdrq_size; - /* next expected TID to check when looking for free */ - u32 port_tidcursor; - /* next expected TID to check */ - unsigned long port_flag; - /* what happened */ - unsigned long int_flag; - /* WAIT_RCV that timed out, no interrupt */ - u32 port_rcvwait_to; - /* WAIT_PIO that timed out, no interrupt */ - u32 port_piowait_to; - /* WAIT_RCV already happened, no wait */ - u32 port_rcvnowait; - /* WAIT_PIO already happened, no wait */ - u32 port_pionowait; - /* total number of rcvhdrqfull errors */ - u32 port_hdrqfull; - /* - * Used to suppress multiple instances of same - * port staying stuck at same point. - */ - u32 port_lastrcvhdrqtail; - /* saved total number of rcvhdrqfull errors for poll edge trigger */ - u32 port_hdrqfull_poll; - /* total number of polled urgent packets */ - u32 port_urgent; - /* saved total number of polled urgent packets for poll edge trigger */ - u32 port_urgent_poll; - /* pid of process using this port */ - struct pid *port_pid; - struct pid *port_subpid[INFINIPATH_MAX_SUBPORT]; - /* same size as task_struct .comm[] */ - char port_comm[16]; - /* pkeys set by this use of this port */ - u16 port_pkeys[4]; - /* so file ops can get at unit */ - struct ipath_devdata *port_dd; - /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ - void *subport_uregbase; - /* An array of pages for the eager receive buffers * N */ - void *subport_rcvegrbuf; - /* An array of pages for the eager header queue entries * N */ - void *subport_rcvhdr_base; - /* The version of the library which opened this port */ - u32 userversion; - /* Bitmask of active slaves */ - u32 active_slaves; - /* Type of packets or conditions we want to poll for */ - u16 poll_type; - /* port rcvhdrq head offset */ - u32 port_head; - /* receive packet sequence counter */ - u32 port_seq_cnt; -}; - -struct sk_buff; -struct ipath_sge_state; -struct ipath_verbs_txreq; - -/* - * control information for layered drivers - */ -struct _ipath_layer { - void *l_arg; -}; - -struct ipath_skbinfo { - struct sk_buff *skb; - dma_addr_t phys; -}; - -struct ipath_sdma_txreq { - int flags; - int sg_count; - union { - struct scatterlist *sg; - void *map_addr; - }; - void (*callback)(void *, int); - void *callback_cookie; - int callback_status; - u16 start_idx; /* sdma private */ - u16 next_descq_idx; /* sdma private */ - struct list_head list; /* sdma private */ -}; - -struct ipath_sdma_desc { - __le64 qw[2]; -}; - -#define IPATH_SDMA_TXREQ_F_USELARGEBUF 0x1 -#define IPATH_SDMA_TXREQ_F_HEADTOHOST 0x2 -#define IPATH_SDMA_TXREQ_F_INTREQ 0x4 -#define IPATH_SDMA_TXREQ_F_FREEBUF 0x8 -#define IPATH_SDMA_TXREQ_F_FREEDESC 0x10 -#define IPATH_SDMA_TXREQ_F_VL15 0x20 - -#define IPATH_SDMA_TXREQ_S_OK 0 -#define IPATH_SDMA_TXREQ_S_SENDERROR 1 -#define IPATH_SDMA_TXREQ_S_ABORTED 2 -#define IPATH_SDMA_TXREQ_S_SHUTDOWN 3 - -#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63) -#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62) -#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61) -#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30) - -/* max dwords in small buffer packet */ -#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2) - -/* - * Possible IB config parameters for ipath_f_get/set_ib_cfg() - */ -#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */ -#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */ -#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */ -#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */ -#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */ -#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */ -#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */ -#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */ -#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */ -#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */ -#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */ - - -struct ipath_devdata { - struct list_head ipath_list; - - struct ipath_kregs const *ipath_kregs; - struct ipath_cregs const *ipath_cregs; - - /* mem-mapped pointer to base of chip regs */ - u64 __iomem *ipath_kregbase; - /* end of mem-mapped chip space; range checking */ - u64 __iomem *ipath_kregend; - /* physical address of chip for io_remap, etc. */ - unsigned long ipath_physaddr; - /* base of memory alloced for ipath_kregbase, for free */ - u64 *ipath_kregalloc; - /* ipath_cfgports pointers */ - struct ipath_portdata **ipath_pd; - /* sk_buffs used by port 0 eager receive queue */ - struct ipath_skbinfo *ipath_port0_skbinfo; - /* kvirt address of 1st 2k pio buffer */ - void __iomem *ipath_pio2kbase; - /* kvirt address of 1st 4k pio buffer */ - void __iomem *ipath_pio4kbase; - /* - * points to area where PIOavail registers will be DMA'ed. - * Has to be on a page of it's own, because the page will be - * mapped into user program space. This copy is *ONLY* ever - * written by DMA, not by the driver! Need a copy per device - * when we get to multiple devices - */ - volatile __le64 *ipath_pioavailregs_dma; - /* physical address where updates occur */ - dma_addr_t ipath_pioavailregs_phys; - struct _ipath_layer ipath_layer; - /* setup intr */ - int (*ipath_f_intrsetup)(struct ipath_devdata *); - /* fallback to alternate interrupt type if possible */ - int (*ipath_f_intr_fallback)(struct ipath_devdata *); - /* setup on-chip bus config */ - int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *); - /* hard reset chip */ - int (*ipath_f_reset)(struct ipath_devdata *); - int (*ipath_f_get_boardname)(struct ipath_devdata *, char *, - size_t); - void (*ipath_f_init_hwerrors)(struct ipath_devdata *); - void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *, - size_t); - void (*ipath_f_quiet_serdes)(struct ipath_devdata *); - int (*ipath_f_bringup_serdes)(struct ipath_devdata *); - int (*ipath_f_early_init)(struct ipath_devdata *); - void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned); - void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*, - u32, unsigned long); - void (*ipath_f_tidtemplate)(struct ipath_devdata *); - void (*ipath_f_cleanup)(struct ipath_devdata *); - void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); - /* fill out chip-specific fields */ - int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); - /* free irq */ - void (*ipath_f_free_irq)(struct ipath_devdata *); - struct ipath_message_header *(*ipath_f_get_msgheader) - (struct ipath_devdata *, __le32 *); - void (*ipath_f_config_ports)(struct ipath_devdata *, ushort); - int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int); - int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32); - void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16); - void (*ipath_f_read_counters)(struct ipath_devdata *, - struct infinipath_counters *); - void (*ipath_f_xgxs_reset)(struct ipath_devdata *); - /* per chip actions needed for IB Link up/down changes */ - int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64); - - unsigned ipath_lastegr_idx; - struct ipath_ibdev *verbs_dev; - struct timer_list verbs_timer; - /* total dwords sent (summed from counter) */ - u64 ipath_sword; - /* total dwords rcvd (summed from counter) */ - u64 ipath_rword; - /* total packets sent (summed from counter) */ - u64 ipath_spkts; - /* total packets rcvd (summed from counter) */ - u64 ipath_rpkts; - /* ipath_statusp initially points to this. */ - u64 _ipath_status; - /* GUID for this interface, in network order */ - __be64 ipath_guid; - /* - * aggregrate of error bits reported since last cleared, for - * limiting of error reporting - */ - ipath_err_t ipath_lasterror; - /* - * aggregrate of error bits reported since last cleared, for - * limiting of hwerror reporting - */ - ipath_err_t ipath_lasthwerror; - /* errors masked because they occur too fast */ - ipath_err_t ipath_maskederrs; - u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */ - /* these 5 fields are used to establish deltas for IB Symbol - * errors and linkrecovery errors. They can be reported on - * some chips during link negotiation prior to INIT, and with - * DDR when faking DDR negotiations with non-IBTA switches. - * The chip counters are adjusted at driver unload if there is - * a non-zero delta. - */ - u64 ibdeltainprog; - u64 ibsymdelta; - u64 ibsymsnap; - u64 iblnkerrdelta; - u64 iblnkerrsnap; - - /* time in jiffies at which to re-enable maskederrs */ - unsigned long ipath_unmasktime; - /* count of egrfull errors, combined for all ports */ - u64 ipath_last_tidfull; - /* for ipath_qcheck() */ - u64 ipath_lastport0rcv_cnt; - /* template for writing TIDs */ - u64 ipath_tidtemplate; - /* value to write to free TIDs */ - u64 ipath_tidinvalid; - /* IBA6120 rcv interrupt setup */ - u64 ipath_rhdrhead_intr_off; - - /* size of memory at ipath_kregbase */ - u32 ipath_kregsize; - /* number of registers used for pioavail */ - u32 ipath_pioavregs; - /* IPATH_POLL, etc. */ - u32 ipath_flags; - /* ipath_flags driver is waiting for */ - u32 ipath_state_wanted; - /* last buffer for user use, first buf for kernel use is this - * index. */ - u32 ipath_lastport_piobuf; - /* is a stats timer active */ - u32 ipath_stats_timer_active; - /* number of interrupts for this device -- saturates... */ - u32 ipath_int_counter; - /* dwords sent read from counter */ - u32 ipath_lastsword; - /* dwords received read from counter */ - u32 ipath_lastrword; - /* sent packets read from counter */ - u32 ipath_lastspkts; - /* received packets read from counter */ - u32 ipath_lastrpkts; - /* pio bufs allocated per port */ - u32 ipath_pbufsport; - /* if remainder on bufs/port, ports < extrabuf get 1 extra */ - u32 ipath_ports_extrabuf; - u32 ipath_pioupd_thresh; /* update threshold, some chips */ - /* - * number of ports configured as max; zero is set to number chip - * supports, less gives more pio bufs/port, etc. - */ - u32 ipath_cfgports; - /* count of port 0 hdrqfull errors */ - u32 ipath_p0_hdrqfull; - /* port 0 number of receive eager buffers */ - u32 ipath_p0_rcvegrcnt; - - /* - * index of last piobuffer we used. Speeds up searching, by - * starting at this point. Doesn't matter if multiple cpu's use and - * update, last updater is only write that matters. Whenever it - * wraps, we update shadow copies. Need a copy per device when we - * get to multiple devices - */ - u32 ipath_lastpioindex; - u32 ipath_lastpioindexl; - /* max length of freezemsg */ - u32 ipath_freezelen; - /* - * consecutive times we wanted a PIO buffer but were unable to - * get one - */ - u32 ipath_consec_nopiobuf; - /* - * hint that we should update ipath_pioavailshadow before - * looking for a PIO buffer - */ - u32 ipath_upd_pio_shadow; - /* so we can rewrite it after a chip reset */ - u32 ipath_pcibar0; - /* so we can rewrite it after a chip reset */ - u32 ipath_pcibar1; - u32 ipath_x1_fix_tries; - u32 ipath_autoneg_tries; - u32 serdes_first_init_done; - - struct ipath_relock { - atomic_t ipath_relock_timer_active; - struct timer_list ipath_relock_timer; - unsigned int ipath_relock_interval; /* in jiffies */ - } ipath_relock_singleton; - - /* interrupt number */ - int ipath_irq; - /* HT/PCI Vendor ID (here for NodeInfo) */ - u16 ipath_vendorid; - /* HT/PCI Device ID (here for NodeInfo) */ - u16 ipath_deviceid; - /* offset in HT config space of slave/primary interface block */ - u8 ipath_ht_slave_off; - /* for write combining settings */ - unsigned long ipath_wc_cookie; - unsigned long ipath_wc_base; - unsigned long ipath_wc_len; - /* ref count for each pkey */ - atomic_t ipath_pkeyrefs[4]; - /* shadow copy of struct page *'s for exp tid pages */ - struct page **ipath_pageshadow; - /* shadow copy of dma handles for exp tid pages */ - dma_addr_t *ipath_physshadow; - u64 __iomem *ipath_egrtidbase; - /* lock to workaround chip bug 9437 and others */ - spinlock_t ipath_kernel_tid_lock; - spinlock_t ipath_user_tid_lock; - spinlock_t ipath_sendctrl_lock; - /* around ipath_pd and (user ports) port_cnt use (intr vs free) */ - spinlock_t ipath_uctxt_lock; - - /* - * IPATH_STATUS_*, - * this address is mapped readonly into user processes so they can - * get status cheaply, whenever they want. - */ - u64 *ipath_statusp; - /* freeze msg if hw error put chip in freeze */ - char *ipath_freezemsg; - /* pci access data structure */ - struct pci_dev *pcidev; - struct cdev *user_cdev; - struct cdev *diag_cdev; - struct device *user_dev; - struct device *diag_dev; - /* timer used to prevent stats overflow, error throttling, etc. */ - struct timer_list ipath_stats_timer; - /* timer to verify interrupts work, and fallback if possible */ - struct timer_list ipath_intrchk_timer; - void *ipath_dummy_hdrq; /* used after port close */ - dma_addr_t ipath_dummy_hdrq_phys; - - /* SendDMA related entries */ - spinlock_t ipath_sdma_lock; - unsigned long ipath_sdma_status; - unsigned long ipath_sdma_abort_jiffies; - unsigned long ipath_sdma_abort_intr_timeout; - unsigned long ipath_sdma_buf_jiffies; - struct ipath_sdma_desc *ipath_sdma_descq; - u64 ipath_sdma_descq_added; - u64 ipath_sdma_descq_removed; - int ipath_sdma_desc_nreserved; - u16 ipath_sdma_descq_cnt; - u16 ipath_sdma_descq_tail; - u16 ipath_sdma_descq_head; - u16 ipath_sdma_next_intr; - u16 ipath_sdma_reset_wait; - u8 ipath_sdma_generation; - struct tasklet_struct ipath_sdma_abort_task; - struct tasklet_struct ipath_sdma_notify_task; - struct list_head ipath_sdma_activelist; - struct list_head ipath_sdma_notifylist; - atomic_t ipath_sdma_vl15_count; - struct timer_list ipath_sdma_vl15_timer; - - dma_addr_t ipath_sdma_descq_phys; - volatile __le64 *ipath_sdma_head_dma; - dma_addr_t ipath_sdma_head_phys; - - unsigned long ipath_ureg_align; /* user register alignment */ - - struct delayed_work ipath_autoneg_work; - wait_queue_head_t ipath_autoneg_wait; - - /* HoL blocking / user app forward-progress state */ - unsigned ipath_hol_state; - unsigned ipath_hol_next; - struct timer_list ipath_hol_timer; - - /* - * Shadow copies of registers; size indicates read access size. - * Most of them are readonly, but some are write-only register, - * where we manipulate the bits in the shadow copy, and then write - * the shadow copy to infinipath. - * - * We deliberately make most of these 32 bits, since they have - * restricted range. For any that we read, we won't to generate 32 - * bit accesses, since Opteron will generate 2 separate 32 bit HT - * transactions for a 64 bit read, and we want to avoid unnecessary - * HT transactions. - */ - - /* This is the 64 bit group */ - - /* - * shadow of pioavail, check to be sure it's large enough at - * init time. - */ - unsigned long ipath_pioavailshadow[8]; - /* bitmap of send buffers available for the kernel to use with PIO. */ - unsigned long ipath_pioavailkernel[8]; - /* shadow of kr_gpio_out, for rmw ops */ - u64 ipath_gpio_out; - /* shadow the gpio mask register */ - u64 ipath_gpio_mask; - /* shadow the gpio output enable, etc... */ - u64 ipath_extctrl; - /* kr_revision shadow */ - u64 ipath_revision; - /* - * shadow of ibcctrl, for interrupt handling of link changes, - * etc. - */ - u64 ipath_ibcctrl; - /* - * last ibcstatus, to suppress "duplicate" status change messages, - * mostly from 2 to 3 - */ - u64 ipath_lastibcstat; - /* hwerrmask shadow */ - ipath_err_t ipath_hwerrmask; - ipath_err_t ipath_errormask; /* errormask shadow */ - /* interrupt config reg shadow */ - u64 ipath_intconfig; - /* kr_sendpiobufbase value */ - u64 ipath_piobufbase; - /* kr_ibcddrctrl shadow */ - u64 ipath_ibcddrctrl; - - /* these are the "32 bit" regs */ - - /* - * number of GUIDs in the flash for this interface; may need some - * rethinking for setting on other ifaces - */ - u32 ipath_nguid; - /* - * the following two are 32-bit bitmasks, but {test,clear,set}_bit - * all expect bit fields to be "unsigned long" - */ - /* shadow kr_rcvctrl */ - unsigned long ipath_rcvctrl; - /* shadow kr_sendctrl */ - unsigned long ipath_sendctrl; - /* to not count armlaunch after cancel */ - unsigned long ipath_lastcancel; - /* count cases where special trigger was needed (double write) */ - unsigned long ipath_spectriggerhit; - - /* value we put in kr_rcvhdrcnt */ - u32 ipath_rcvhdrcnt; - /* value we put in kr_rcvhdrsize */ - u32 ipath_rcvhdrsize; - /* value we put in kr_rcvhdrentsize */ - u32 ipath_rcvhdrentsize; - /* offset of last entry in rcvhdrq */ - u32 ipath_hdrqlast; - /* kr_portcnt value */ - u32 ipath_portcnt; - /* kr_pagealign value */ - u32 ipath_palign; - /* number of "2KB" PIO buffers */ - u32 ipath_piobcnt2k; - /* size in bytes of "2KB" PIO buffers */ - u32 ipath_piosize2k; - /* number of "4KB" PIO buffers */ - u32 ipath_piobcnt4k; - /* size in bytes of "4KB" PIO buffers */ - u32 ipath_piosize4k; - u32 ipath_pioreserved; /* reserved special-inkernel; */ - /* kr_rcvegrbase value */ - u32 ipath_rcvegrbase; - /* kr_rcvegrcnt value */ - u32 ipath_rcvegrcnt; - /* kr_rcvtidbase value */ - u32 ipath_rcvtidbase; - /* kr_rcvtidcnt value */ - u32 ipath_rcvtidcnt; - /* kr_sendregbase */ - u32 ipath_sregbase; - /* kr_userregbase */ - u32 ipath_uregbase; - /* kr_counterregbase */ - u32 ipath_cregbase; - /* shadow the control register contents */ - u32 ipath_control; - /* PCI revision register (HTC rev on FPGA) */ - u32 ipath_pcirev; - - /* chip address space used by 4k pio buffers */ - u32 ipath_4kalign; - /* The MTU programmed for this unit */ - u32 ipath_ibmtu; - /* - * The max size IB packet, included IB headers that we can send. - * Starts same as ipath_piosize, but is affected when ibmtu is - * changed, or by size of eager buffers - */ - u32 ipath_ibmaxlen; - /* - * ibmaxlen at init time, limited by chip and by receive buffer - * size. Not changed after init. - */ - u32 ipath_init_ibmaxlen; - /* size of each rcvegrbuffer */ - u32 ipath_rcvegrbufsize; - /* localbus width (1, 2,4,8,16,32) from config space */ - u32 ipath_lbus_width; - /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */ - u32 ipath_lbus_speed; - /* - * number of sequential ibcstatus change for polling active/quiet - * (i.e., link not coming up). - */ - u32 ipath_ibpollcnt; - /* low and high portions of MSI capability/vector */ - u32 ipath_msi_lo; - /* saved after PCIe init for restore after reset */ - u32 ipath_msi_hi; - /* MSI data (vector) saved for restore */ - u16 ipath_msi_data; - /* MLID programmed for this instance */ - u16 ipath_mlid; - /* LID programmed for this instance */ - u16 ipath_lid; - /* list of pkeys programmed; 0 if not set */ - u16 ipath_pkeys[4]; - /* - * ASCII serial number, from flash, large enough for original - * all digit strings, and longer QLogic serial number format - */ - u8 ipath_serial[16]; - /* human readable board version */ - u8 ipath_boardversion[96]; - u8 ipath_lbus_info[32]; /* human readable localbus info */ - /* chip major rev, from ipath_revision */ - u8 ipath_majrev; - /* chip minor rev, from ipath_revision */ - u8 ipath_minrev; - /* board rev, from ipath_revision */ - u8 ipath_boardrev; - /* saved for restore after reset */ - u8 ipath_pci_cacheline; - /* LID mask control */ - u8 ipath_lmc; - /* link width supported */ - u8 ipath_link_width_supported; - /* link speed supported */ - u8 ipath_link_speed_supported; - u8 ipath_link_width_enabled; - u8 ipath_link_speed_enabled; - u8 ipath_link_width_active; - u8 ipath_link_speed_active; - /* Rx Polarity inversion (compensate for ~tx on partner) */ - u8 ipath_rx_pol_inv; - - u8 ipath_r_portenable_shift; - u8 ipath_r_intravail_shift; - u8 ipath_r_tailupd_shift; - u8 ipath_r_portcfg_shift; - - /* unit # of this chip, if present */ - int ipath_unit; - - /* local link integrity counter */ - u32 ipath_lli_counter; - /* local link integrity errors */ - u32 ipath_lli_errors; - /* - * Above counts only cases where _successive_ LocalLinkIntegrity - * errors were seen in the receive headers of kern-packets. - * Below are the three (monotonically increasing) counters - * maintained via GPIO interrupts on iba6120-rev2. - */ - u32 ipath_rxfc_unsupvl_errs; - u32 ipath_overrun_thresh_errs; - u32 ipath_lli_errs; - - /* - * Not all devices managed by a driver instance are the same - * type, so these fields must be per-device. - */ - u64 ipath_i_bitsextant; - ipath_err_t ipath_e_bitsextant; - ipath_err_t ipath_hwe_bitsextant; - - /* - * Below should be computable from number of ports, - * since they are never modified. - */ - u64 ipath_i_rcvavail_mask; - u64 ipath_i_rcvurg_mask; - u16 ipath_i_rcvurg_shift; - u16 ipath_i_rcvavail_shift; - - /* - * Register bits for selecting i2c direction and values, used for - * I2C serial flash. - */ - u8 ipath_gpio_sda_num; - u8 ipath_gpio_scl_num; - u8 ipath_i2c_chain_type; - u64 ipath_gpio_sda; - u64 ipath_gpio_scl; - - /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */ - spinlock_t ipath_gpio_lock; - - /* - * IB link and linktraining states and masks that vary per chip in - * some way. Set at init, to avoid each IB status change interrupt - */ - u8 ibcs_ls_shift; - u8 ibcs_lts_mask; - u32 ibcs_mask; - u32 ib_init; - u32 ib_arm; - u32 ib_active; - - u16 ipath_rhf_offset; /* offset of RHF within receive header entry */ - - /* - * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol - * reg. Changes for IBA7220 - */ - u8 ibcc_lic_mask; /* LinkInitCmd */ - u8 ibcc_lc_shift; /* LinkCmd */ - u8 ibcc_mpl_shift; /* Maxpktlen */ - - u8 delay_mult; - - /* used to override LED behavior */ - u8 ipath_led_override; /* Substituted for normal value, if non-zero */ - u16 ipath_led_override_timeoff; /* delta to next timer event */ - u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */ - u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */ - atomic_t ipath_led_override_timer_active; - /* Used to flash LEDs in override mode */ - struct timer_list ipath_led_override_timer; - - /* Support (including locks) for EEPROM logging of errors and time */ - /* control access to actual counters, timer */ - spinlock_t ipath_eep_st_lock; - /* control high-level access to EEPROM */ - struct mutex ipath_eep_lock; - /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ - uint64_t ipath_traffic_wds; - /* active time is kept in seconds, but logged in hours */ - atomic_t ipath_active_time; - /* Below are nominal shadow of EEPROM, new since last EEPROM update */ - uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT]; - uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT]; - uint16_t ipath_eep_hrs; - /* - * masks for which bits of errs, hwerrs that cause - * each of the counters to increment. - */ - struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; - - /* interrupt mitigation reload register info */ - u16 ipath_jint_idle_ticks; /* idle clock ticks */ - u16 ipath_jint_max_packets; /* max packets across all ports */ - - /* - * lock for access to SerDes, and flags to sequence preset - * versus steady-state. 7220-only at the moment. - */ - spinlock_t ipath_sdepb_lock; - u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */ -}; - -/* ipath_hol_state values (stopping/starting user proc, send flushing) */ -#define IPATH_HOL_UP 0 -#define IPATH_HOL_DOWN 1 -/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */ -#define IPATH_HOL_DOWNSTOP 0 -#define IPATH_HOL_DOWNCONT 1 - -/* bit positions for sdma_status */ -#define IPATH_SDMA_ABORTING 0 -#define IPATH_SDMA_DISARMED 1 -#define IPATH_SDMA_DISABLED 2 -#define IPATH_SDMA_LAYERBUF 3 -#define IPATH_SDMA_RUNNING 30 -#define IPATH_SDMA_SHUTDOWN 31 - -/* bit combinations that correspond to abort states */ -#define IPATH_SDMA_ABORT_NONE 0 -#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING) -#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \ - (1UL << IPATH_SDMA_DISARMED)) -#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \ - (1UL << IPATH_SDMA_DISABLED)) -#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \ - (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED)) -#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \ - (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED)) - -#define IPATH_SDMA_BUF_NONE 0 -#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF) - -/* Private data for file operations */ -struct ipath_filedata { - struct ipath_portdata *pd; - unsigned subport; - unsigned tidcursor; - struct ipath_user_sdma_queue *pq; -}; -extern struct list_head ipath_dev_list; -extern spinlock_t ipath_devs_lock; -extern struct ipath_devdata *ipath_lookup(int unit); - -int ipath_init_chip(struct ipath_devdata *, int); -int ipath_enable_wc(struct ipath_devdata *dd); -void ipath_disable_wc(struct ipath_devdata *dd); -int ipath_count_units(int *npresentp, int *nupp, int *maxportsp); -void ipath_shutdown_device(struct ipath_devdata *); -void ipath_clear_freeze(struct ipath_devdata *); - -struct file_operations; -int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, - struct cdev **cdevp, struct device **devp); -void ipath_cdev_cleanup(struct cdev **cdevp, - struct device **devp); - -int ipath_diag_add(struct ipath_devdata *); -void ipath_diag_remove(struct ipath_devdata *); - -extern wait_queue_head_t ipath_state_wait; - -int ipath_user_add(struct ipath_devdata *dd); -void ipath_user_remove(struct ipath_devdata *dd); - -struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t); - -extern int ipath_diag_inuse; - -irqreturn_t ipath_intr(int irq, void *devid); -int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, - ipath_err_t err); -#if __IPATH_INFO || __IPATH_DBG -extern const char *ipath_ibcstatus_str[]; -#endif - -/* clean up any per-chip chip-specific stuff */ -void ipath_chip_cleanup(struct ipath_devdata *); -/* clean up any chip type-specific stuff */ -void ipath_chip_done(void); - -void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first, - unsigned cnt); -void ipath_cancel_sends(struct ipath_devdata *, int); - -int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *); -void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); - -int ipath_parse_ushort(const char *str, unsigned short *valp); - -void ipath_kreceive(struct ipath_portdata *); -int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); -int ipath_reset_device(int); -void ipath_get_faststats(unsigned long); -int ipath_wait_linkstate(struct ipath_devdata *, u32, int); -int ipath_set_linkstate(struct ipath_devdata *, u8); -int ipath_set_mtu(struct ipath_devdata *, u16); -int ipath_set_lid(struct ipath_devdata *, u32, u8); -int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); -void ipath_enable_armlaunch(struct ipath_devdata *); -void ipath_disable_armlaunch(struct ipath_devdata *); -void ipath_hol_down(struct ipath_devdata *); -void ipath_hol_up(struct ipath_devdata *); -void ipath_hol_event(unsigned long); -void ipath_toggle_rclkrls(struct ipath_devdata *); -void ipath_sd7220_clr_ibpar(struct ipath_devdata *); -void ipath_set_relock_poll(struct ipath_devdata *, int); -void ipath_shutdown_relock_poll(struct ipath_devdata *); - -/* for use in system calls, where we want to know device type, etc. */ -#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd -#define subport_fp(fp) \ - ((struct ipath_filedata *)(fp)->private_data)->subport -#define tidcursor_fp(fp) \ - ((struct ipath_filedata *)(fp)->private_data)->tidcursor -#define user_sdma_queue_fp(fp) \ - ((struct ipath_filedata *)(fp)->private_data)->pq - -/* - * values for ipath_flags - */ - /* chip can report link latency (IB 1.2) */ -#define IPATH_HAS_LINK_LATENCY 0x1 - /* The chip is up and initted */ -#define IPATH_INITTED 0x2 - /* set if any user code has set kr_rcvhdrsize */ -#define IPATH_RCVHDRSZ_SET 0x4 - /* The chip is present and valid for accesses */ -#define IPATH_PRESENT 0x8 - /* HT link0 is only 8 bits wide, ignore upper byte crc - * errors, etc. */ -#define IPATH_8BIT_IN_HT0 0x10 - /* HT link1 is only 8 bits wide, ignore upper byte crc - * errors, etc. */ -#define IPATH_8BIT_IN_HT1 0x20 - /* The link is down */ -#define IPATH_LINKDOWN 0x40 - /* The link level is up (0x11) */ -#define IPATH_LINKINIT 0x80 - /* The link is in the armed (0x21) state */ -#define IPATH_LINKARMED 0x100 - /* The link is in the active (0x31) state */ -#define IPATH_LINKACTIVE 0x200 - /* link current state is unknown */ -#define IPATH_LINKUNK 0x400 - /* Write combining flush needed for PIO */ -#define IPATH_PIO_FLUSH_WC 0x1000 - /* DMA Receive tail pointer */ -#define IPATH_NODMA_RTAIL 0x2000 - /* no IB cable, or no device on IB cable */ -#define IPATH_NOCABLE 0x4000 - /* Supports port zero per packet receive interrupts via - * GPIO */ -#define IPATH_GPIO_INTR 0x8000 - /* uses the coded 4byte TID, not 8 byte */ -#define IPATH_4BYTE_TID 0x10000 - /* packet/word counters are 32 bit, else those 4 counters - * are 64bit */ -#define IPATH_32BITCOUNTERS 0x20000 - /* Interrupt register is 64 bits */ -#define IPATH_INTREG_64 0x40000 - /* can miss port0 rx interrupts */ -#define IPATH_DISABLED 0x80000 /* administratively disabled */ - /* Use GPIO interrupts for new counters */ -#define IPATH_GPIO_ERRINTRS 0x100000 -#define IPATH_SWAP_PIOBUFS 0x200000 - /* Supports Send DMA */ -#define IPATH_HAS_SEND_DMA 0x400000 - /* Supports Send Count (not just word count) in PBC */ -#define IPATH_HAS_PBC_CNT 0x800000 - /* Suppress heartbeat, even if turning off loopback */ -#define IPATH_NO_HRTBT 0x1000000 -#define IPATH_HAS_THRESH_UPDATE 0x4000000 -#define IPATH_HAS_MULT_IB_SPEED 0x8000000 -#define IPATH_IB_AUTONEG_INPROG 0x10000000 -#define IPATH_IB_AUTONEG_FAILED 0x20000000 - /* Linkdown-disable intentionally, Do not attempt to bring up */ -#define IPATH_IB_LINK_DISABLED 0x40000000 -#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */ - -/* Bits in GPIO for the added interrupts */ -#define IPATH_GPIO_PORT0_BIT 2 -#define IPATH_GPIO_RXUVL_BIT 3 -#define IPATH_GPIO_OVRUN_BIT 4 -#define IPATH_GPIO_LLI_BIT 5 -#define IPATH_GPIO_ERRINTR_MASK 0x38 - -/* portdata flag bit offsets */ - /* waiting for a packet to arrive */ -#define IPATH_PORT_WAITING_RCV 2 - /* master has not finished initializing */ -#define IPATH_PORT_MASTER_UNINIT 4 - /* waiting for an urgent packet to arrive */ -#define IPATH_PORT_WAITING_URG 5 - -/* free up any allocated data at closes */ -void ipath_free_data(struct ipath_portdata *dd); -u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *); -void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, - unsigned len, int avail); -void ipath_init_iba6110_funcs(struct ipath_devdata *); -void ipath_get_eeprom_info(struct ipath_devdata *); -int ipath_update_eeprom_log(struct ipath_devdata *dd); -void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); -u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); -void ipath_disarm_senderrbufs(struct ipath_devdata *); -void ipath_force_pio_avail_update(struct ipath_devdata *); -void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); - -/* - * Set LED override, only the two LSBs have "public" meaning, but - * any non-zero value substitutes them for the Link and LinkTrain - * LED states. - */ -#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ -#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */ -void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); - -/* send dma routines */ -int setup_sdma(struct ipath_devdata *); -void teardown_sdma(struct ipath_devdata *); -void ipath_restart_sdma(struct ipath_devdata *); -void ipath_sdma_intr(struct ipath_devdata *); -int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *, - u32, struct ipath_verbs_txreq *); -/* ipath_sdma_lock should be locked before calling this. */ -int ipath_sdma_make_progress(struct ipath_devdata *dd); - -/* must be called under ipath_sdma_lock */ -static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd) -{ - return dd->ipath_sdma_descq_cnt - - (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) - - 1 - dd->ipath_sdma_desc_nreserved; -} - -static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt) -{ - dd->ipath_sdma_desc_nreserved += cnt; -} - -static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt) -{ - dd->ipath_sdma_desc_nreserved -= cnt; -} - -/* - * number of words used for protocol header if not set by ipath_userinit(); - */ -#define IPATH_DFLT_RCVHDRSIZE 9 - -int ipath_get_user_pages(unsigned long, size_t, struct page **); -void ipath_release_user_pages(struct page **, size_t); -void ipath_release_user_pages_on_close(struct page **, size_t); -int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int); -int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int); -int ipath_tempsense_read(struct ipath_devdata *, u8 regnum); -int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data); - -/* these are used for the registers that vary with port */ -void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, - unsigned, u64); - -/* - * We could have a single register get/put routine, that takes a group type, - * but this is somewhat clearer and cleaner. It also gives us some error - * checking. 64 bit register reads should always work, but are inefficient - * on opteron (the northbridge always generates 2 separate HT 32 bit reads), - * so we use kreg32 wherever possible. User register and counter register - * reads are always 32 bit reads, so only one form of those routines. - */ - -/* - * At the moment, none of the s-registers are writable, so no - * ipath_write_sreg(). - */ - -/** - * ipath_read_ureg32 - read 32-bit virtualized per-port register - * @dd: device - * @regno: register number - * @port: port number - * - * Return the contents of a register that is virtualized to be per port. - * Returns -1 on errors (not distinguishable from valid contents at - * runtime; we may add a separate error variable at some point). - */ -static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd, - ipath_ureg regno, int port) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return 0; - - return readl(regno + (u64 __iomem *) - (dd->ipath_uregbase + - (char __iomem *)dd->ipath_kregbase + - dd->ipath_ureg_align * port)); -} - -/** - * ipath_write_ureg - write 32-bit virtualized per-port register - * @dd: device - * @regno: register number - * @value: value - * @port: port - * - * Write the contents of a register that is virtualized to be per port. - */ -static inline void ipath_write_ureg(const struct ipath_devdata *dd, - ipath_ureg regno, u64 value, int port) -{ - u64 __iomem *ubase = (u64 __iomem *) - (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase + - dd->ipath_ureg_align * port); - if (dd->ipath_kregbase) - writeq(value, &ubase[regno]); -} - -static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd, - ipath_kreg regno) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return -1; - return readl((u32 __iomem *) & dd->ipath_kregbase[regno]); -} - -static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd, - ipath_kreg regno) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return -1; - - return readq(&dd->ipath_kregbase[regno]); -} - -static inline void ipath_write_kreg(const struct ipath_devdata *dd, - ipath_kreg regno, u64 value) -{ - if (dd->ipath_kregbase) - writeq(value, &dd->ipath_kregbase[regno]); -} - -static inline u64 ipath_read_creg(const struct ipath_devdata *dd, - ipath_sreg regno) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return 0; - - return readq(regno + (u64 __iomem *) - (dd->ipath_cregbase + - (char __iomem *)dd->ipath_kregbase)); -} - -static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, - ipath_sreg regno) -{ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) - return 0; - return readl(regno + (u64 __iomem *) - (dd->ipath_cregbase + - (char __iomem *)dd->ipath_kregbase)); -} - -static inline void ipath_write_creg(const struct ipath_devdata *dd, - ipath_creg regno, u64 value) -{ - if (dd->ipath_kregbase) - writeq(value, regno + (u64 __iomem *) - (dd->ipath_cregbase + - (char __iomem *)dd->ipath_kregbase)); -} - -static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd) -{ - *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL; -} - -static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd) -{ - return (u32) le64_to_cpu(*((volatile __le64 *) - pd->port_rcvhdrtail_kvaddr)); -} - -static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd) -{ - const struct ipath_devdata *dd = pd->port_dd; - u32 hdrqtail; - - if (dd->ipath_flags & IPATH_NODMA_RTAIL) { - __le32 *rhf_addr; - u32 seq; - - rhf_addr = (__le32 *) pd->port_rcvhdrq + - pd->port_head + dd->ipath_rhf_offset; - seq = ipath_hdrget_seq(rhf_addr); - hdrqtail = pd->port_head; - if (seq == pd->port_seq_cnt) - hdrqtail++; - } else - hdrqtail = ipath_get_rcvhdrtail(pd); - - return hdrqtail; -} - -static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r) -{ - return (dd->ipath_flags & IPATH_INTREG_64) ? - ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r); -} - -/* - * from contents of IBCStatus (or a saved copy), return linkstate - * Report ACTIVE_DEFER as ACTIVE, because we treat them the same - * everywhere, anyway (and should be, for almost all purposes). - */ -static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs) -{ - u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) & - INFINIPATH_IBCS_LINKSTATE_MASK; - if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER) - state = INFINIPATH_IBCS_L_STATE_ACTIVE; - return state; -} - -/* from contents of IBCStatus (or a saved copy), return linktrainingstate */ -static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs) -{ - return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - dd->ibcs_lts_mask; -} - -/* - * from contents of IBCStatus (or a saved copy), return logical link state - * combination of link state and linktraining state (down, active, init, - * arm, etc. - */ -static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs) -{ - u32 ibs; - ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - dd->ibcs_lts_mask; - ibs |= (u32)(ibcs & - (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift)); - return ibs; -} - -/* - * sysfs interface. - */ - -struct device_driver; - -extern const char ib_ipath_version[]; - -extern const struct attribute_group *ipath_driver_attr_groups[]; - -int ipath_device_create_group(struct device *, struct ipath_devdata *); -void ipath_device_remove_group(struct device *, struct ipath_devdata *); -int ipath_expose_reset(struct device *); - -int ipath_init_ipathfs(void); -void ipath_exit_ipathfs(void); -int ipathfs_add_device(struct ipath_devdata *); -int ipathfs_remove_device(struct ipath_devdata *); - -/* - * dma_addr wrappers - all 0's invalid for hw - */ -dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long, - size_t, int); -dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); -const char *ipath_get_unit_name(int unit); - -/* - * Flush write combining store buffers (if present) and perform a write - * barrier. - */ -#if defined(CONFIG_X86_64) -#define ipath_flush_wc() asm volatile("sfence" ::: "memory") -#else -#define ipath_flush_wc() wmb() -#endif - -extern unsigned ipath_debug; /* debugging bit mask */ -extern unsigned ipath_linkrecovery; -extern unsigned ipath_mtu4096; -extern struct mutex ipath_mutex; - -#define IPATH_DRV_NAME "ib_ipath" -#define IPATH_MAJOR 233 -#define IPATH_USER_MINOR_BASE 0 -#define IPATH_DIAGPKT_MINOR 127 -#define IPATH_DIAG_MINOR_BASE 129 -#define IPATH_NMINORS 255 - -#define ipath_dev_err(dd,fmt,...) \ - do { \ - const struct ipath_devdata *__dd = (dd); \ - if (__dd->pcidev) \ - dev_err(&__dd->pcidev->dev, "%s: " fmt, \ - ipath_get_unit_name(__dd->ipath_unit), \ - ##__VA_ARGS__); \ - else \ - printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \ - ipath_get_unit_name(__dd->ipath_unit), \ - ##__VA_ARGS__); \ - } while (0) - -#if _IPATH_DEBUGGING - -# define __IPATH_DBG_WHICH(which,fmt,...) \ - do { \ - if (unlikely(ipath_debug & (which))) \ - printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \ - __func__,##__VA_ARGS__); \ - } while(0) - -# define ipath_dbg(fmt,...) \ - __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__) -# define ipath_cdbg(which,fmt,...) \ - __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__) - -#else /* ! _IPATH_DEBUGGING */ - -# define ipath_dbg(fmt,...) -# define ipath_cdbg(which,fmt,...) - -#endif /* _IPATH_DEBUGGING */ - -/* - * this is used for formatting hw error messages... - */ -struct ipath_hwerror_msgs { - u64 mask; - const char *msg; -}; - -#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b } - -/* in ipath_intr.c... */ -void ipath_format_hwerrors(u64 hwerrs, - const struct ipath_hwerror_msgs *hwerrmsgs, - size_t nhwerrmsgs, - char *msg, size_t lmsg); - -#endif /* _IPATH_KERNEL_H */ diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_keys.c b/kernel/drivers/infiniband/hw/ipath/ipath_keys.c deleted file mode 100644 index c0e933fec..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_keys.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <asm/io.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/** - * ipath_alloc_lkey - allocate an lkey - * @rkt: lkey table in which to allocate the lkey - * @mr: memory region that this lkey protects - * - * Returns 1 if successful, otherwise returns 0. - */ - -int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) -{ - unsigned long flags; - u32 r; - u32 n; - int ret; - - spin_lock_irqsave(&rkt->lock, flags); - - /* Find the next available LKEY */ - r = n = rkt->next; - for (;;) { - if (rkt->table[r] == NULL) - break; - r = (r + 1) & (rkt->max - 1); - if (r == n) { - spin_unlock_irqrestore(&rkt->lock, flags); - ipath_dbg("LKEY table full\n"); - ret = 0; - goto bail; - } - } - rkt->next = (r + 1) & (rkt->max - 1); - /* - * Make sure lkey is never zero which is reserved to indicate an - * unrestricted LKEY. - */ - rkt->gen++; - mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) | - ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen) - << 8); - if (mr->lkey == 0) { - mr->lkey |= 1 << 8; - rkt->gen++; - } - rkt->table[r] = mr; - spin_unlock_irqrestore(&rkt->lock, flags); - - ret = 1; - -bail: - return ret; -} - -/** - * ipath_free_lkey - free an lkey - * @rkt: table from which to free the lkey - * @lkey: lkey id to free - */ -void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey) -{ - unsigned long flags; - u32 r; - - if (lkey == 0) - return; - r = lkey >> (32 - ib_ipath_lkey_table_size); - spin_lock_irqsave(&rkt->lock, flags); - rkt->table[r] = NULL; - spin_unlock_irqrestore(&rkt->lock, flags); -} - -/** - * ipath_lkey_ok - check IB SGE for validity and initialize - * @rkt: table containing lkey to check SGE against - * @isge: outgoing internal SGE - * @sge: SGE to check - * @acc: access flags - * - * Return 1 if valid and successful, otherwise returns 0. - * - * Check the IB SGE for validity and initialize our internal version - * of it. - */ -int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, - struct ib_sge *sge, int acc) -{ - struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct ipath_mregion *mr; - unsigned n, m; - size_t off; - int ret; - - /* - * We use LKEY == zero for kernel virtual addresses - * (see ipath_get_dma_mr and ipath_dma.c). - */ - if (sge->lkey == 0) { - /* always a kernel port, no locking needed */ - struct ipath_pd *pd = to_ipd(qp->ibqp.pd); - - if (pd->user) { - ret = 0; - goto bail; - } - isge->mr = NULL; - isge->vaddr = (void *) sge->addr; - isge->length = sge->length; - isge->sge_length = sge->length; - ret = 1; - goto bail; - } - mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey || - qp->ibqp.pd != mr->pd)) { - ret = 0; - goto bail; - } - - off = sge->addr - mr->user_base; - if (unlikely(sge->addr < mr->user_base || - off + sge->length > mr->length || - (mr->access_flags & acc) != acc)) { - ret = 0; - goto bail; - } - - off += mr->offset; - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= IPATH_SEGSZ) { - m++; - n = 0; - } - } - isge->mr = mr; - isge->vaddr = mr->map[m]->segs[n].vaddr + off; - isge->length = mr->map[m]->segs[n].length - off; - isge->sge_length = sge->length; - isge->m = m; - isge->n = n; - - ret = 1; - -bail: - return ret; -} - -/** - * ipath_rkey_ok - check the IB virtual address, length, and RKEY - * @dev: infiniband device - * @ss: SGE state - * @len: length of data - * @vaddr: virtual address to place data - * @rkey: rkey to check - * @acc: access flags - * - * Return 1 if successful, otherwise 0. - */ -int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, - u32 len, u64 vaddr, u32 rkey, int acc) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_lkey_table *rkt = &dev->lk_table; - struct ipath_sge *sge = &ss->sge; - struct ipath_mregion *mr; - unsigned n, m; - size_t off; - int ret; - - /* - * We use RKEY == zero for kernel virtual addresses - * (see ipath_get_dma_mr and ipath_dma.c). - */ - if (rkey == 0) { - /* always a kernel port, no locking needed */ - struct ipath_pd *pd = to_ipd(qp->ibqp.pd); - - if (pd->user) { - ret = 0; - goto bail; - } - sge->mr = NULL; - sge->vaddr = (void *) vaddr; - sge->length = len; - sge->sge_length = len; - ss->sg_list = NULL; - ss->num_sge = 1; - ret = 1; - goto bail; - } - - mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey || - qp->ibqp.pd != mr->pd)) { - ret = 0; - goto bail; - } - - off = vaddr - mr->iova; - if (unlikely(vaddr < mr->iova || off + len > mr->length || - (mr->access_flags & acc) == 0)) { - ret = 0; - goto bail; - } - - off += mr->offset; - m = 0; - n = 0; - while (off >= mr->map[m]->segs[n].length) { - off -= mr->map[m]->segs[n].length; - n++; - if (n >= IPATH_SEGSZ) { - m++; - n = 0; - } - } - sge->mr = mr; - sge->vaddr = mr->map[m]->segs[n].vaddr + off; - sge->length = mr->map[m]->segs[n].length - off; - sge->sge_length = len; - sge->m = m; - sge->n = n; - ss->sg_list = NULL; - ss->num_sge = 1; - - ret = 1; - -bail: - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_mad.c b/kernel/drivers/infiniband/hw/ipath/ipath_mad.c deleted file mode 100644 index e890e5ba0..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_mad.c +++ /dev/null @@ -1,1513 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <rdma/ib_smi.h> -#include <rdma/ib_pma.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) -#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) -#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C) -#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C) - -static int reply(struct ib_smp *smp) -{ - /* - * The verbs framework will handle the directed/LID route - * packet changes. - */ - smp->method = IB_MGMT_METHOD_GET_RESP; - if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - smp->status |= IB_SMP_DIRECTION; - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - -static int recv_subn_get_nodedescription(struct ib_smp *smp, - struct ib_device *ibdev) -{ - if (smp->attr_mod) - smp->status |= IB_SMP_INVALID_FIELD; - - memcpy(smp->data, ibdev->node_desc, sizeof(smp->data)); - - return reply(smp); -} - -struct nodeinfo { - u8 base_version; - u8 class_version; - u8 node_type; - u8 num_ports; - __be64 sys_guid; - __be64 node_guid; - __be64 port_guid; - __be16 partition_cap; - __be16 device_id; - __be32 revision; - u8 local_port_num; - u8 vendor_id[3]; -} __attribute__ ((packed)); - -static int recv_subn_get_nodeinfo(struct ib_smp *smp, - struct ib_device *ibdev, u8 port) -{ - struct nodeinfo *nip = (struct nodeinfo *)&smp->data; - struct ipath_devdata *dd = to_idev(ibdev)->dd; - u32 vendor, majrev, minrev; - - /* GUID 0 is illegal */ - if (smp->attr_mod || (dd->ipath_guid == 0)) - smp->status |= IB_SMP_INVALID_FIELD; - - nip->base_version = 1; - nip->class_version = 1; - nip->node_type = 1; /* channel adapter */ - /* - * XXX The num_ports value will need a layer function to get - * the value if we ever have more than one IB port on a chip. - * We will also need to get the GUID for the port. - */ - nip->num_ports = ibdev->phys_port_cnt; - /* This is already in network order */ - nip->sys_guid = to_idev(ibdev)->sys_image_guid; - nip->node_guid = dd->ipath_guid; - nip->port_guid = dd->ipath_guid; - nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); - nip->device_id = cpu_to_be16(dd->ipath_deviceid); - majrev = dd->ipath_majrev; - minrev = dd->ipath_minrev; - nip->revision = cpu_to_be32((majrev << 16) | minrev); - nip->local_port_num = port; - vendor = dd->ipath_vendorid; - nip->vendor_id[0] = IPATH_SRC_OUI_1; - nip->vendor_id[1] = IPATH_SRC_OUI_2; - nip->vendor_id[2] = IPATH_SRC_OUI_3; - - return reply(smp); -} - -static int recv_subn_get_guidinfo(struct ib_smp *smp, - struct ib_device *ibdev) -{ - u32 startgx = 8 * be32_to_cpu(smp->attr_mod); - __be64 *p = (__be64 *) smp->data; - - /* 32 blocks of 8 64-bit GUIDs per block */ - - memset(smp->data, 0, sizeof(smp->data)); - - /* - * We only support one GUID for now. If this changes, the - * portinfo.guid_cap field needs to be updated too. - */ - if (startgx == 0) { - __be64 g = to_idev(ibdev)->dd->ipath_guid; - if (g == 0) - /* GUID 0 is illegal */ - smp->status |= IB_SMP_INVALID_FIELD; - else - /* The first is a copy of the read-only HW GUID. */ - *p = g; - } else - smp->status |= IB_SMP_INVALID_FIELD; - - return reply(smp); -} - -static void set_link_width_enabled(struct ipath_devdata *dd, u32 w) -{ - (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w); -} - -static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s) -{ - (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s); -} - -static int get_overrunthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; -} - -/** - * set_overrunthreshold - set the overrun threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -static int get_phyerrthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; -} - -/** - * set_phyerrthreshold - set the physical error threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -/** - * get_linkdowndefaultstate - get the default linkdown state - * @dd: the infinipath device - * - * Returns zero if the default is POLL, 1 if the default is SLEEP. - */ -static int get_linkdowndefaultstate(struct ipath_devdata *dd) -{ - return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); -} - -static int recv_subn_get_portinfo(struct ib_smp *smp, - struct ib_device *ibdev, u8 port) -{ - struct ipath_ibdev *dev; - struct ipath_devdata *dd; - struct ib_port_info *pip = (struct ib_port_info *)smp->data; - u16 lid; - u8 ibcstat; - u8 mtu; - int ret; - - if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) { - smp->status |= IB_SMP_INVALID_FIELD; - ret = reply(smp); - goto bail; - } - - dev = to_idev(ibdev); - dd = dev->dd; - - /* Clear all fields. Only set the non-zero fields. */ - memset(smp->data, 0, sizeof(smp->data)); - - /* Only return the mkey if the protection field allows it. */ - if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey || - dev->mkeyprot == 0) - pip->mkey = dev->mkey; - pip->gid_prefix = dev->gid_prefix; - lid = dd->ipath_lid; - pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; - pip->sm_lid = cpu_to_be16(dev->sm_lid); - pip->cap_mask = cpu_to_be32(dev->port_cap_flags); - /* pip->diag_code; */ - pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period); - pip->local_port_num = port; - pip->link_width_enabled = dd->ipath_link_width_enabled; - pip->link_width_supported = dd->ipath_link_width_supported; - pip->link_width_active = dd->ipath_link_width_active; - pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4; - ibcstat = dd->ipath_lastibcstat; - /* map LinkState to IB portinfo values. */ - pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1; - - pip->portphysstate_linkdown = - (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) | - (get_linkdowndefaultstate(dd) ? 1 : 2); - pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc; - pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) | - dd->ipath_link_speed_enabled; - switch (dd->ipath_ibmtu) { - case 4096: - mtu = IB_MTU_4096; - break; - case 2048: - mtu = IB_MTU_2048; - break; - case 1024: - mtu = IB_MTU_1024; - break; - case 512: - mtu = IB_MTU_512; - break; - case 256: - mtu = IB_MTU_256; - break; - default: /* oops, something is wrong */ - mtu = IB_MTU_2048; - break; - } - pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl; - pip->vlcap_inittype = 0x10; /* VLCap = VL0, InitType = 0 */ - pip->vl_high_limit = dev->vl_high_limit; - /* pip->vl_arb_high_cap; // only one VL */ - /* pip->vl_arb_low_cap; // only one VL */ - /* InitTypeReply = 0 */ - /* our mtu cap depends on whether 4K MTU enabled or not */ - pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048; - /* HCAs ignore VLStallCount and HOQLife */ - /* pip->vlstallcnt_hoqlife; */ - pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */ - pip->mkey_violations = cpu_to_be16(dev->mkey_violations); - /* P_KeyViolations are counted by hardware. */ - pip->pkey_violations = - cpu_to_be16((ipath_get_cr_errpkey(dd) - - dev->z_pkey_violations) & 0xFFFF); - pip->qkey_violations = cpu_to_be16(dev->qkey_violations); - /* Only the hardware GUID is supported for now */ - pip->guid_cap = 1; - pip->clientrereg_resv_subnetto = dev->subnet_timeout; - /* 32.768 usec. response time (guessing) */ - pip->resv_resptimevalue = 3; - pip->localphyerrors_overrunerrors = - (get_phyerrthreshold(dd) << 4) | - get_overrunthreshold(dd); - /* pip->max_credit_hint; */ - if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) { - u32 v; - - v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY); - pip->link_roundtrip_latency[0] = v >> 16; - pip->link_roundtrip_latency[1] = v >> 8; - pip->link_roundtrip_latency[2] = v; - } - - ret = reply(smp); - -bail: - return ret; -} - -/** - * get_pkeys - return the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the pkey table is placed here - */ -static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) -{ - /* always a kernel port, no locking needed */ - struct ipath_portdata *pd = dd->ipath_pd[0]; - - memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); - - return 0; -} - -static int recv_subn_get_pkeytable(struct ib_smp *smp, - struct ib_device *ibdev) -{ - u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); - u16 *p = (u16 *) smp->data; - __be16 *q = (__be16 *) smp->data; - - /* 64 blocks of 32 16-bit P_Key entries */ - - memset(smp->data, 0, sizeof(smp->data)); - if (startpx == 0) { - struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_get_npkeys(dev->dd); - - get_pkeys(dev->dd, p); - - for (i = 0; i < n; i++) - q[i] = cpu_to_be16(p[i]); - } else - smp->status |= IB_SMP_INVALID_FIELD; - - return reply(smp); -} - -static int recv_subn_set_guidinfo(struct ib_smp *smp, - struct ib_device *ibdev) -{ - /* The only GUID we support is the first read-only entry. */ - return recv_subn_get_guidinfo(smp, ibdev); -} - -/** - * set_linkdowndefaultstate - set the default linkdown state - * @dd: the infinipath device - * @sleep: the new state - * - * Note that this will only take effect when the link state changes. - */ -static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep) -{ - if (sleep) - dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - else - dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - return 0; -} - -/** - * recv_subn_set_portinfo - set port information - * @smp: the incoming SM packet - * @ibdev: the infiniband device - * @port: the port on the device - * - * Set Portinfo (see ch. 14.2.5.6). - */ -static int recv_subn_set_portinfo(struct ib_smp *smp, - struct ib_device *ibdev, u8 port) -{ - struct ib_port_info *pip = (struct ib_port_info *)smp->data; - struct ib_event event; - struct ipath_ibdev *dev; - struct ipath_devdata *dd; - char clientrereg = 0; - u16 lid, smlid; - u8 lwe; - u8 lse; - u8 state; - u16 lstate; - u32 mtu; - int ret, ore; - - if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) - goto err; - - dev = to_idev(ibdev); - dd = dev->dd; - event.device = ibdev; - event.element.port_num = port; - - dev->mkey = pip->mkey; - dev->gid_prefix = pip->gid_prefix; - dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); - - lid = be16_to_cpu(pip->lid); - if (dd->ipath_lid != lid || - dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) { - /* Must be a valid unicast LID address. */ - if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) - goto err; - ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7); - event.event = IB_EVENT_LID_CHANGE; - ib_dispatch_event(&event); - } - - smlid = be16_to_cpu(pip->sm_lid); - if (smlid != dev->sm_lid) { - /* Must be a valid unicast LID address. */ - if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE) - goto err; - dev->sm_lid = smlid; - event.event = IB_EVENT_SM_CHANGE; - ib_dispatch_event(&event); - } - - /* Allow 1x or 4x to be set (see 14.2.6.6). */ - lwe = pip->link_width_enabled; - if (lwe) { - if (lwe == 0xFF) - lwe = dd->ipath_link_width_supported; - else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported)) - goto err; - set_link_width_enabled(dd, lwe); - } - - /* Allow 2.5 or 5.0 Gbs. */ - lse = pip->linkspeedactive_enabled & 0xF; - if (lse) { - if (lse == 15) - lse = dd->ipath_link_speed_supported; - else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported)) - goto err; - set_link_speed_enabled(dd, lse); - } - - /* Set link down default state. */ - switch (pip->portphysstate_linkdown & 0xF) { - case 0: /* NOP */ - break; - case 1: /* SLEEP */ - if (set_linkdowndefaultstate(dd, 1)) - goto err; - break; - case 2: /* POLL */ - if (set_linkdowndefaultstate(dd, 0)) - goto err; - break; - default: - goto err; - } - - dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6; - dev->vl_high_limit = pip->vl_high_limit; - - switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) { - case IB_MTU_256: - mtu = 256; - break; - case IB_MTU_512: - mtu = 512; - break; - case IB_MTU_1024: - mtu = 1024; - break; - case IB_MTU_2048: - mtu = 2048; - break; - case IB_MTU_4096: - if (!ipath_mtu4096) - goto err; - mtu = 4096; - break; - default: - /* XXX We have already partially updated our state! */ - goto err; - } - ipath_set_mtu(dd, mtu); - - dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; - - /* We only support VL0 */ - if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1) - goto err; - - if (pip->mkey_violations == 0) - dev->mkey_violations = 0; - - /* - * Hardware counter can't be reset so snapshot and subtract - * later. - */ - if (pip->pkey_violations == 0) - dev->z_pkey_violations = ipath_get_cr_errpkey(dd); - - if (pip->qkey_violations == 0) - dev->qkey_violations = 0; - - ore = pip->localphyerrors_overrunerrors; - if (set_phyerrthreshold(dd, (ore >> 4) & 0xF)) - goto err; - - if (set_overrunthreshold(dd, (ore & 0xF))) - goto err; - - dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; - - if (pip->clientrereg_resv_subnetto & 0x80) { - clientrereg = 1; - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } - - /* - * Do the port state change now that the other link parameters - * have been set. - * Changing the port physical state only makes sense if the link - * is down or is being set to down. - */ - state = pip->linkspeed_portstate & 0xF; - lstate = (pip->portphysstate_linkdown >> 4) & 0xF; - if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) - goto err; - - /* - * Only state changes of DOWN, ARM, and ACTIVE are valid - * and must be in the correct state to take effect (see 7.2.6). - */ - switch (state) { - case IB_PORT_NOP: - if (lstate == 0) - break; - /* FALLTHROUGH */ - case IB_PORT_DOWN: - if (lstate == 0) - lstate = IPATH_IB_LINKDOWN_ONLY; - else if (lstate == 1) - lstate = IPATH_IB_LINKDOWN_SLEEP; - else if (lstate == 2) - lstate = IPATH_IB_LINKDOWN; - else if (lstate == 3) - lstate = IPATH_IB_LINKDOWN_DISABLE; - else - goto err; - ipath_set_linkstate(dd, lstate); - if (lstate == IPATH_IB_LINKDOWN_DISABLE) { - ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - goto done; - } - ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED | - IPATH_LINKACTIVE, 1000); - break; - case IB_PORT_ARMED: - ipath_set_linkstate(dd, IPATH_IB_LINKARM); - break; - case IB_PORT_ACTIVE: - ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); - break; - default: - /* XXX We have already partially updated our state! */ - goto err; - } - - ret = recv_subn_get_portinfo(smp, ibdev, port); - - if (clientrereg) - pip->clientrereg_resv_subnetto |= 0x80; - - goto done; - -err: - smp->status |= IB_SMP_INVALID_FIELD; - ret = recv_subn_get_portinfo(smp, ibdev, port); - -done: - return ret; -} - -/** - * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the infinipath device - * @key: the PKEY index - * - * Return true if this was the last reference and the hardware table entry - * needs to be changed. - */ -static int rm_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (dd->ipath_pkeys[i] != key) - continue; - if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { - dd->ipath_pkeys[i] = 0; - ret = 1; - goto bail; - } - break; - } - - ret = 0; - -bail: - return ret; -} - -/** - * add_pkey - add the given PKEY to the hardware table - * @dd: the infinipath device - * @key: the PKEY - * - * Return an error code if unable to add the entry, zero if no change, - * or 1 if the hardware PKEY register needs to be updated. - */ -static int add_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - u16 lkey = key & 0x7FFF; - int any = 0; - int ret; - - if (lkey == 0x7FFF) { - ret = 0; - goto bail; - } - - /* Look for an empty slot or a matching PKEY. */ - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i]) { - any++; - continue; - } - /* If it matches exactly, try to increment the ref count */ - if (dd->ipath_pkeys[i] == key) { - if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { - ret = 0; - goto bail; - } - /* Lost the race. Look for an empty slot below. */ - atomic_dec(&dd->ipath_pkeyrefs[i]); - any++; - } - /* - * It makes no sense to have both the limited and unlimited - * PKEY set at the same time since the unlimited one will - * disable the limited one. - */ - if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { - ret = -EEXIST; - goto bail; - } - } - if (!any) { - ret = -EBUSY; - goto bail; - } - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i] && - atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { - /* for ipathstats, etc. */ - ipath_stats.sps_pkeys[i] = lkey; - dd->ipath_pkeys[i] = key; - ret = 1; - goto bail; - } - } - ret = -EBUSY; - -bail: - return ret; -} - -/** - * set_pkeys - set the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the PKEY table - */ -static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port) -{ - struct ipath_portdata *pd; - int i; - int changed = 0; - - /* always a kernel port, no locking needed */ - pd = dd->ipath_pd[0]; - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - u16 key = pkeys[i]; - u16 okey = pd->port_pkeys[i]; - - if (key == okey) - continue; - /* - * The value of this PKEY table entry is changing. - * Remove the old entry in the hardware's array of PKEYs. - */ - if (okey & 0x7FFF) - changed |= rm_pkey(dd, okey); - if (key & 0x7FFF) { - int ret = add_pkey(dd, key); - - if (ret < 0) - key = 0; - else - changed |= ret; - } - pd->port_pkeys[i] = key; - } - if (changed) { - u64 pkey; - struct ib_event event; - - pkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", - (unsigned long long) pkey); - ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, - pkey); - - event.event = IB_EVENT_PKEY_CHANGE; - event.device = &dd->verbs_dev->ibdev; - event.element.port_num = port; - ib_dispatch_event(&event); - } - return 0; -} - -static int recv_subn_set_pkeytable(struct ib_smp *smp, - struct ib_device *ibdev, u8 port) -{ - u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); - __be16 *p = (__be16 *) smp->data; - u16 *q = (u16 *) smp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_get_npkeys(dev->dd); - - for (i = 0; i < n; i++) - q[i] = be16_to_cpu(p[i]); - - if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0) - smp->status |= IB_SMP_INVALID_FIELD; - - return recv_subn_get_pkeytable(smp, ibdev); -} - -static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp) -{ - struct ib_class_port_info *p = - (struct ib_class_port_info *)pmp->data; - - memset(pmp->data, 0, sizeof(pmp->data)); - - if (pmp->mad_hdr.attr_mod != 0) - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - - /* Indicate AllPortSelect is valid (only one port anyway) */ - p->capability_mask = cpu_to_be16(1 << 8); - p->base_version = 1; - p->class_version = 1; - /* - * Expected response time is 4.096 usec. * 2^18 == 1.073741824 - * sec. - */ - p->resp_time_value = 18; - - return reply((struct ib_smp *) pmp); -} - -/* - * The PortSamplesControl.CounterMasks field is an array of 3 bit fields - * which specify the N'th counter's capabilities. See ch. 16.1.3.2. - * We support 5 counters which only count the mandatory quantities. - */ -#define COUNTER_MASK(q, n) (q << ((9 - n) * 3)) -#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \ - COUNTER_MASK(1, 1) | \ - COUNTER_MASK(1, 2) | \ - COUNTER_MASK(1, 3) | \ - COUNTER_MASK(1, 4)) - -static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portsamplescontrol *p = - (struct ib_pma_portsamplescontrol *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cregs const *crp = dev->dd->ipath_cregs; - unsigned long flags; - u8 port_select = p->port_select; - - memset(pmp->data, 0, sizeof(pmp->data)); - - p->port_select = port_select; - if (pmp->mad_hdr.attr_mod != 0 || - (port_select != port && port_select != 0xFF)) - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - /* - * Ticks are 10x the link transfer period which for 2.5Gbs is 4 - * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample - * intervals are counted in ticks. Since we use Linux timers, that - * count in jiffies, we can't sample for less than 1000 ticks if HZ - * == 1000 (4000 ticks if HZ is 250). link_speed_active returns 2 for - * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that - * have hardware support for delaying packets. - */ - if (crp->cr_psstat) - p->tick = dev->dd->ipath_link_speed_active - 1; - else - p->tick = 250; /* 1 usec. */ - p->counter_width = 4; /* 32 bit counters */ - p->counter_mask0_9 = COUNTER_MASK0_9; - spin_lock_irqsave(&dev->pending_lock, flags); - if (crp->cr_psstat) - p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat); - else - p->sample_status = dev->pma_sample_status; - p->sample_start = cpu_to_be32(dev->pma_sample_start); - p->sample_interval = cpu_to_be32(dev->pma_sample_interval); - p->tag = cpu_to_be16(dev->pma_tag); - p->counter_select[0] = dev->pma_counter_select[0]; - p->counter_select[1] = dev->pma_counter_select[1]; - p->counter_select[2] = dev->pma_counter_select[2]; - p->counter_select[3] = dev->pma_counter_select[3]; - p->counter_select[4] = dev->pma_counter_select[4]; - spin_unlock_irqrestore(&dev->pending_lock, flags); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portsamplescontrol *p = - (struct ib_pma_portsamplescontrol *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cregs const *crp = dev->dd->ipath_cregs; - unsigned long flags; - u8 status; - int ret; - - if (pmp->mad_hdr.attr_mod != 0 || - (p->port_select != port && p->port_select != 0xFF)) { - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - ret = reply((struct ib_smp *) pmp); - goto bail; - } - - spin_lock_irqsave(&dev->pending_lock, flags); - if (crp->cr_psstat) - status = ipath_read_creg32(dev->dd, crp->cr_psstat); - else - status = dev->pma_sample_status; - if (status == IB_PMA_SAMPLE_STATUS_DONE) { - dev->pma_sample_start = be32_to_cpu(p->sample_start); - dev->pma_sample_interval = be32_to_cpu(p->sample_interval); - dev->pma_tag = be16_to_cpu(p->tag); - dev->pma_counter_select[0] = p->counter_select[0]; - dev->pma_counter_select[1] = p->counter_select[1]; - dev->pma_counter_select[2] = p->counter_select[2]; - dev->pma_counter_select[3] = p->counter_select[3]; - dev->pma_counter_select[4] = p->counter_select[4]; - if (crp->cr_psstat) { - ipath_write_creg(dev->dd, crp->cr_psinterval, - dev->pma_sample_interval); - ipath_write_creg(dev->dd, crp->cr_psstart, - dev->pma_sample_start); - } else - dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED; - } - spin_unlock_irqrestore(&dev->pending_lock, flags); - - ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port); - -bail: - return ret; -} - -static u64 get_counter(struct ipath_ibdev *dev, - struct ipath_cregs const *crp, - __be16 sel) -{ - u64 ret; - - switch (sel) { - case IB_PMA_PORT_XMIT_DATA: - ret = (crp->cr_psxmitdatacount) ? - ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) : - dev->ipath_sword; - break; - case IB_PMA_PORT_RCV_DATA: - ret = (crp->cr_psrcvdatacount) ? - ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) : - dev->ipath_rword; - break; - case IB_PMA_PORT_XMIT_PKTS: - ret = (crp->cr_psxmitpktscount) ? - ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) : - dev->ipath_spkts; - break; - case IB_PMA_PORT_RCV_PKTS: - ret = (crp->cr_psrcvpktscount) ? - ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) : - dev->ipath_rpkts; - break; - case IB_PMA_PORT_XMIT_WAIT: - ret = (crp->cr_psxmitwaitcount) ? - ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) : - dev->ipath_xmit_wait; - break; - default: - ret = 0; - } - - return ret; -} - -static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp, - struct ib_device *ibdev) -{ - struct ib_pma_portsamplesresult *p = - (struct ib_pma_portsamplesresult *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cregs const *crp = dev->dd->ipath_cregs; - u8 status; - int i; - - memset(pmp->data, 0, sizeof(pmp->data)); - p->tag = cpu_to_be16(dev->pma_tag); - if (crp->cr_psstat) - status = ipath_read_creg32(dev->dd, crp->cr_psstat); - else - status = dev->pma_sample_status; - p->sample_status = cpu_to_be16(status); - for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) - p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : - cpu_to_be32( - get_counter(dev, crp, dev->pma_counter_select[i])); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, - struct ib_device *ibdev) -{ - struct ib_pma_portsamplesresult_ext *p = - (struct ib_pma_portsamplesresult_ext *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_cregs const *crp = dev->dd->ipath_cregs; - u8 status; - int i; - - memset(pmp->data, 0, sizeof(pmp->data)); - p->tag = cpu_to_be16(dev->pma_tag); - if (crp->cr_psstat) - status = ipath_read_creg32(dev->dd, crp->cr_psstat); - else - status = dev->pma_sample_status; - p->sample_status = cpu_to_be16(status); - /* 64 bits */ - p->extended_width = cpu_to_be32(0x80000000); - for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) - p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : - cpu_to_be64( - get_counter(dev, crp, dev->pma_counter_select[i])); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_get_portcounters(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) - pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_verbs_counters cntrs; - u8 port_select = p->port_select; - - ipath_get_counters(dev->dd, &cntrs); - - /* Adjust counters for any resets done. */ - cntrs.symbol_error_counter -= dev->z_symbol_error_counter; - cntrs.link_error_recovery_counter -= - dev->z_link_error_recovery_counter; - cntrs.link_downed_counter -= dev->z_link_downed_counter; - cntrs.port_rcv_errors += dev->rcv_errors; - cntrs.port_rcv_errors -= dev->z_port_rcv_errors; - cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors; - cntrs.port_xmit_discards -= dev->z_port_xmit_discards; - cntrs.port_xmit_data -= dev->z_port_xmit_data; - cntrs.port_rcv_data -= dev->z_port_rcv_data; - cntrs.port_xmit_packets -= dev->z_port_xmit_packets; - cntrs.port_rcv_packets -= dev->z_port_rcv_packets; - cntrs.local_link_integrity_errors -= - dev->z_local_link_integrity_errors; - cntrs.excessive_buffer_overrun_errors -= - dev->z_excessive_buffer_overrun_errors; - cntrs.vl15_dropped -= dev->z_vl15_dropped; - cntrs.vl15_dropped += dev->n_vl15_dropped; - - memset(pmp->data, 0, sizeof(pmp->data)); - - p->port_select = port_select; - if (pmp->mad_hdr.attr_mod != 0 || - (port_select != port && port_select != 0xFF)) - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - - if (cntrs.symbol_error_counter > 0xFFFFUL) - p->symbol_error_counter = cpu_to_be16(0xFFFF); - else - p->symbol_error_counter = - cpu_to_be16((u16)cntrs.symbol_error_counter); - if (cntrs.link_error_recovery_counter > 0xFFUL) - p->link_error_recovery_counter = 0xFF; - else - p->link_error_recovery_counter = - (u8)cntrs.link_error_recovery_counter; - if (cntrs.link_downed_counter > 0xFFUL) - p->link_downed_counter = 0xFF; - else - p->link_downed_counter = (u8)cntrs.link_downed_counter; - if (cntrs.port_rcv_errors > 0xFFFFUL) - p->port_rcv_errors = cpu_to_be16(0xFFFF); - else - p->port_rcv_errors = - cpu_to_be16((u16) cntrs.port_rcv_errors); - if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) - p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); - else - p->port_rcv_remphys_errors = - cpu_to_be16((u16)cntrs.port_rcv_remphys_errors); - if (cntrs.port_xmit_discards > 0xFFFFUL) - p->port_xmit_discards = cpu_to_be16(0xFFFF); - else - p->port_xmit_discards = - cpu_to_be16((u16)cntrs.port_xmit_discards); - if (cntrs.local_link_integrity_errors > 0xFUL) - cntrs.local_link_integrity_errors = 0xFUL; - if (cntrs.excessive_buffer_overrun_errors > 0xFUL) - cntrs.excessive_buffer_overrun_errors = 0xFUL; - p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | - cntrs.excessive_buffer_overrun_errors; - if (cntrs.vl15_dropped > 0xFFFFUL) - p->vl15_dropped = cpu_to_be16(0xFFFF); - else - p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); - if (cntrs.port_xmit_data > 0xFFFFFFFFUL) - p->port_xmit_data = cpu_to_be32(0xFFFFFFFF); - else - p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data); - if (cntrs.port_rcv_data > 0xFFFFFFFFUL) - p->port_rcv_data = cpu_to_be32(0xFFFFFFFF); - else - p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data); - if (cntrs.port_xmit_packets > 0xFFFFFFFFUL) - p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF); - else - p->port_xmit_packets = - cpu_to_be32((u32)cntrs.port_xmit_packets); - if (cntrs.port_rcv_packets > 0xFFFFFFFFUL) - p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF); - else - p->port_rcv_packets = - cpu_to_be32((u32) cntrs.port_rcv_packets); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portcounters_ext *p = - (struct ib_pma_portcounters_ext *)pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - u64 swords, rwords, spkts, rpkts, xwait; - u8 port_select = p->port_select; - - ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); - - /* Adjust counters for any resets done. */ - swords -= dev->z_port_xmit_data; - rwords -= dev->z_port_rcv_data; - spkts -= dev->z_port_xmit_packets; - rpkts -= dev->z_port_rcv_packets; - - memset(pmp->data, 0, sizeof(pmp->data)); - - p->port_select = port_select; - if (pmp->mad_hdr.attr_mod != 0 || - (port_select != port && port_select != 0xFF)) - pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; - - p->port_xmit_data = cpu_to_be64(swords); - p->port_rcv_data = cpu_to_be64(rwords); - p->port_xmit_packets = cpu_to_be64(spkts); - p->port_rcv_packets = cpu_to_be64(rpkts); - p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit); - p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv); - p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit); - p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv); - - return reply((struct ib_smp *) pmp); -} - -static int recv_pma_set_portcounters(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) - pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_verbs_counters cntrs; - - /* - * Since the HW doesn't support clearing counters, we save the - * current count and subtract it from future responses. - */ - ipath_get_counters(dev->dd, &cntrs); - - if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) - dev->z_symbol_error_counter = cntrs.symbol_error_counter; - - if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY) - dev->z_link_error_recovery_counter = - cntrs.link_error_recovery_counter; - - if (p->counter_select & IB_PMA_SEL_LINK_DOWNED) - dev->z_link_downed_counter = cntrs.link_downed_counter; - - if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS) - dev->z_port_rcv_errors = - cntrs.port_rcv_errors + dev->rcv_errors; - - if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS) - dev->z_port_rcv_remphys_errors = - cntrs.port_rcv_remphys_errors; - - if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS) - dev->z_port_xmit_discards = cntrs.port_xmit_discards; - - if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS) - dev->z_local_link_integrity_errors = - cntrs.local_link_integrity_errors; - - if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS) - dev->z_excessive_buffer_overrun_errors = - cntrs.excessive_buffer_overrun_errors; - - if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) { - dev->n_vl15_dropped = 0; - dev->z_vl15_dropped = cntrs.vl15_dropped; - } - - if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) - dev->z_port_xmit_data = cntrs.port_xmit_data; - - if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA) - dev->z_port_rcv_data = cntrs.port_rcv_data; - - if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS) - dev->z_port_xmit_packets = cntrs.port_xmit_packets; - - if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS) - dev->z_port_rcv_packets = cntrs.port_rcv_packets; - - return recv_pma_get_portcounters(pmp, ibdev, port); -} - -static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp, - struct ib_device *ibdev, u8 port) -{ - struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) - pmp->data; - struct ipath_ibdev *dev = to_idev(ibdev); - u64 swords, rwords, spkts, rpkts, xwait; - - ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); - - if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) - dev->z_port_xmit_data = swords; - - if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA) - dev->z_port_rcv_data = rwords; - - if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS) - dev->z_port_xmit_packets = spkts; - - if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) - dev->z_port_rcv_packets = rpkts; - - if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) - dev->n_unicast_xmit = 0; - - if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) - dev->n_unicast_rcv = 0; - - if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) - dev->n_multicast_xmit = 0; - - if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) - dev->n_multicast_rcv = 0; - - return recv_pma_get_portcounters_ext(pmp, ibdev, port); -} - -static int process_subn(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_mad *in_mad, - struct ib_mad *out_mad) -{ - struct ib_smp *smp = (struct ib_smp *)out_mad; - struct ipath_ibdev *dev = to_idev(ibdev); - int ret; - - *out_mad = *in_mad; - if (smp->class_version != 1) { - smp->status |= IB_SMP_UNSUP_VERSION; - ret = reply(smp); - goto bail; - } - - /* Is the mkey in the process of expiring? */ - if (dev->mkey_lease_timeout && - time_after_eq(jiffies, dev->mkey_lease_timeout)) { - /* Clear timeout and mkey protection field. */ - dev->mkey_lease_timeout = 0; - dev->mkeyprot = 0; - } - - /* - * M_Key checking depends on - * Portinfo:M_Key_protect_bits - */ - if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 && - dev->mkey != smp->mkey && - (smp->method == IB_MGMT_METHOD_SET || - (smp->method == IB_MGMT_METHOD_GET && - dev->mkeyprot >= 2))) { - if (dev->mkey_violations != 0xFFFF) - ++dev->mkey_violations; - if (dev->mkey_lease_timeout || - dev->mkey_lease_period == 0) { - ret = IB_MAD_RESULT_SUCCESS | - IB_MAD_RESULT_CONSUMED; - goto bail; - } - dev->mkey_lease_timeout = jiffies + - dev->mkey_lease_period * HZ; - /* Future: Generate a trap notice. */ - ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - goto bail; - } else if (dev->mkey_lease_timeout) - dev->mkey_lease_timeout = 0; - - switch (smp->method) { - case IB_MGMT_METHOD_GET: - switch (smp->attr_id) { - case IB_SMP_ATTR_NODE_DESC: - ret = recv_subn_get_nodedescription(smp, ibdev); - goto bail; - case IB_SMP_ATTR_NODE_INFO: - ret = recv_subn_get_nodeinfo(smp, ibdev, port_num); - goto bail; - case IB_SMP_ATTR_GUID_INFO: - ret = recv_subn_get_guidinfo(smp, ibdev); - goto bail; - case IB_SMP_ATTR_PORT_INFO: - ret = recv_subn_get_portinfo(smp, ibdev, port_num); - goto bail; - case IB_SMP_ATTR_PKEY_TABLE: - ret = recv_subn_get_pkeytable(smp, ibdev); - goto bail; - case IB_SMP_ATTR_SM_INFO: - if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { - ret = IB_MAD_RESULT_SUCCESS | - IB_MAD_RESULT_CONSUMED; - goto bail; - } - if (dev->port_cap_flags & IB_PORT_SM) { - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - } - /* FALLTHROUGH */ - default: - smp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply(smp); - goto bail; - } - - case IB_MGMT_METHOD_SET: - switch (smp->attr_id) { - case IB_SMP_ATTR_GUID_INFO: - ret = recv_subn_set_guidinfo(smp, ibdev); - goto bail; - case IB_SMP_ATTR_PORT_INFO: - ret = recv_subn_set_portinfo(smp, ibdev, port_num); - goto bail; - case IB_SMP_ATTR_PKEY_TABLE: - ret = recv_subn_set_pkeytable(smp, ibdev, port_num); - goto bail; - case IB_SMP_ATTR_SM_INFO: - if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { - ret = IB_MAD_RESULT_SUCCESS | - IB_MAD_RESULT_CONSUMED; - goto bail; - } - if (dev->port_cap_flags & IB_PORT_SM) { - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - } - /* FALLTHROUGH */ - default: - smp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply(smp); - goto bail; - } - - case IB_MGMT_METHOD_TRAP: - case IB_MGMT_METHOD_REPORT: - case IB_MGMT_METHOD_REPORT_RESP: - case IB_MGMT_METHOD_TRAP_REPRESS: - case IB_MGMT_METHOD_GET_RESP: - /* - * The ib_mad module will call us to process responses - * before checking for other consumers. - * Just tell the caller to process it normally. - */ - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - default: - smp->status |= IB_SMP_UNSUP_METHOD; - ret = reply(smp); - } - -bail: - return ret; -} - -static int process_perf(struct ib_device *ibdev, u8 port_num, - struct ib_mad *in_mad, - struct ib_mad *out_mad) -{ - struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; - int ret; - - *out_mad = *in_mad; - if (pmp->mad_hdr.class_version != 1) { - pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; - ret = reply((struct ib_smp *) pmp); - goto bail; - } - - switch (pmp->mad_hdr.method) { - case IB_MGMT_METHOD_GET: - switch (pmp->mad_hdr.attr_id) { - case IB_PMA_CLASS_PORT_INFO: - ret = recv_pma_get_classportinfo(pmp); - goto bail; - case IB_PMA_PORT_SAMPLES_CONTROL: - ret = recv_pma_get_portsamplescontrol(pmp, ibdev, - port_num); - goto bail; - case IB_PMA_PORT_SAMPLES_RESULT: - ret = recv_pma_get_portsamplesresult(pmp, ibdev); - goto bail; - case IB_PMA_PORT_SAMPLES_RESULT_EXT: - ret = recv_pma_get_portsamplesresult_ext(pmp, - ibdev); - goto bail; - case IB_PMA_PORT_COUNTERS: - ret = recv_pma_get_portcounters(pmp, ibdev, - port_num); - goto bail; - case IB_PMA_PORT_COUNTERS_EXT: - ret = recv_pma_get_portcounters_ext(pmp, ibdev, - port_num); - goto bail; - default: - pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) pmp); - goto bail; - } - - case IB_MGMT_METHOD_SET: - switch (pmp->mad_hdr.attr_id) { - case IB_PMA_PORT_SAMPLES_CONTROL: - ret = recv_pma_set_portsamplescontrol(pmp, ibdev, - port_num); - goto bail; - case IB_PMA_PORT_COUNTERS: - ret = recv_pma_set_portcounters(pmp, ibdev, - port_num); - goto bail; - case IB_PMA_PORT_COUNTERS_EXT: - ret = recv_pma_set_portcounters_ext(pmp, ibdev, - port_num); - goto bail; - default: - pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) pmp); - goto bail; - } - - case IB_MGMT_METHOD_GET_RESP: - /* - * The ib_mad module will call us to process responses - * before checking for other consumers. - * Just tell the caller to process it normally. - */ - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - default: - pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; - ret = reply((struct ib_smp *) pmp); - } - -bail: - return ret; -} - -/** - * ipath_process_mad - process an incoming MAD packet - * @ibdev: the infiniband device this packet came in on - * @mad_flags: MAD flags - * @port_num: the port number this packet came in on - * @in_wc: the work completion entry for this packet - * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply - * - * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not - * interested in processing. - * - * Note that the verbs framework has already done the MAD sanity checks, - * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE - * MADs. - * - * This is called by the ib_mad module. - */ -int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - int ret; - - switch (in_mad->mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: - case IB_MGMT_CLASS_SUBN_LID_ROUTED: - ret = process_subn(ibdev, mad_flags, port_num, - in_mad, out_mad); - goto bail; - case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf(ibdev, port_num, in_mad, out_mad); - goto bail; - default: - ret = IB_MAD_RESULT_SUCCESS; - } - -bail: - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_mmap.c b/kernel/drivers/infiniband/hw/ipath/ipath_mmap.c deleted file mode 100644 index e73274229..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_mmap.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/module.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/errno.h> -#include <asm/pgtable.h> - -#include "ipath_verbs.h" - -/** - * ipath_release_mmap_info - free mmap info structure - * @ref: a pointer to the kref within struct ipath_mmap_info - */ -void ipath_release_mmap_info(struct kref *ref) -{ - struct ipath_mmap_info *ip = - container_of(ref, struct ipath_mmap_info, ref); - struct ipath_ibdev *dev = to_idev(ip->context->device); - - spin_lock_irq(&dev->pending_lock); - list_del(&ip->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - - vfree(ip->obj); - kfree(ip); -} - -/* - * open and close keep track of how many times the CQ is mapped, - * to avoid releasing it. - */ -static void ipath_vma_open(struct vm_area_struct *vma) -{ - struct ipath_mmap_info *ip = vma->vm_private_data; - - kref_get(&ip->ref); -} - -static void ipath_vma_close(struct vm_area_struct *vma) -{ - struct ipath_mmap_info *ip = vma->vm_private_data; - - kref_put(&ip->ref, ipath_release_mmap_info); -} - -static const struct vm_operations_struct ipath_vm_ops = { - .open = ipath_vma_open, - .close = ipath_vma_close, -}; - -/** - * ipath_mmap - create a new mmap region - * @context: the IB user context of the process making the mmap() call - * @vma: the VMA to be initialized - * Return zero if the mmap is OK. Otherwise, return an errno. - */ -int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - struct ipath_ibdev *dev = to_idev(context->device); - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long size = vma->vm_end - vma->vm_start; - struct ipath_mmap_info *ip, *pp; - int ret = -EINVAL; - - /* - * Search the device's list of objects waiting for a mmap call. - * Normally, this list is very short since a call to create a - * CQ, QP, or SRQ is soon followed by a call to mmap(). - */ - spin_lock_irq(&dev->pending_lock); - list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, - pending_mmaps) { - /* Only the creator is allowed to mmap the object */ - if (context != ip->context || (__u64) offset != ip->offset) - continue; - /* Don't allow a mmap larger than the object. */ - if (size > ip->size) - break; - - list_del_init(&ip->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - - ret = remap_vmalloc_range(vma, ip->obj, 0); - if (ret) - goto done; - vma->vm_ops = &ipath_vm_ops; - vma->vm_private_data = ip; - ipath_vma_open(vma); - goto done; - } - spin_unlock_irq(&dev->pending_lock); -done: - return ret; -} - -/* - * Allocate information for ipath_mmap - */ -struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, - u32 size, - struct ib_ucontext *context, - void *obj) { - struct ipath_mmap_info *ip; - - ip = kmalloc(sizeof *ip, GFP_KERNEL); - if (!ip) - goto bail; - - size = PAGE_ALIGN(size); - - spin_lock_irq(&dev->mmap_offset_lock); - if (dev->mmap_offset == 0) - dev->mmap_offset = PAGE_SIZE; - ip->offset = dev->mmap_offset; - dev->mmap_offset += size; - spin_unlock_irq(&dev->mmap_offset_lock); - - INIT_LIST_HEAD(&ip->pending_mmaps); - ip->size = size; - ip->context = context; - ip->obj = obj; - kref_init(&ip->ref); - -bail: - return ip; -} - -void ipath_update_mmap_info(struct ipath_ibdev *dev, - struct ipath_mmap_info *ip, - u32 size, void *obj) { - size = PAGE_ALIGN(size); - - spin_lock_irq(&dev->mmap_offset_lock); - if (dev->mmap_offset == 0) - dev->mmap_offset = PAGE_SIZE; - ip->offset = dev->mmap_offset; - dev->mmap_offset += size; - spin_unlock_irq(&dev->mmap_offset_lock); - - ip->size = size; - ip->obj = obj; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_mr.c b/kernel/drivers/infiniband/hw/ipath/ipath_mr.c deleted file mode 100644 index c7278f6a8..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_mr.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/slab.h> - -#include <rdma/ib_umem.h> -#include <rdma/ib_pack.h> -#include <rdma/ib_smi.h> - -#include "ipath_verbs.h" - -/* Fast memory region */ -struct ipath_fmr { - struct ib_fmr ibfmr; - u8 page_shift; - struct ipath_mregion mr; /* must be last */ -}; - -static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct ipath_fmr, ibfmr); -} - -/** - * ipath_get_dma_mr - get a DMA memory region - * @pd: protection domain for this memory region - * @acc: access flags - * - * Returns the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the - * struct ib_dma_mapping_ops functions (see ipath_dma.c). - */ -struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc) -{ - struct ipath_mr *mr; - struct ib_mr *ret; - - mr = kzalloc(sizeof *mr, GFP_KERNEL); - if (!mr) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - mr->mr.access_flags = acc; - ret = &mr->ibmr; - -bail: - return ret; -} - -static struct ipath_mr *alloc_mr(int count, - struct ipath_lkey_table *lk_table) -{ - struct ipath_mr *mr; - int m, i = 0; - - /* Allocate struct plus pointers to first level page tables. */ - m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ; - mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); - if (!mr) - goto done; - - /* Allocate first level page tables. */ - for (; i < m; i++) { - mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); - if (!mr->mr.map[i]) - goto bail; - } - mr->mr.mapsz = m; - - /* - * ib_reg_phys_mr() will initialize mr->ibmr except for - * lkey and rkey. - */ - if (!ipath_alloc_lkey(lk_table, &mr->mr)) - goto bail; - mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey; - - goto done; - -bail: - while (i) { - i--; - kfree(mr->mr.map[i]); - } - kfree(mr); - mr = NULL; - -done: - return mr; -} - -/** - * ipath_reg_phys_mr - register a physical memory region - * @pd: protection domain for this memory region - * @buffer_list: pointer to the list of physical buffers to register - * @num_phys_buf: the number of physical buffers to register - * @iova_start: the starting address passed over IB which maps to this MR - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start) -{ - struct ipath_mr *mr; - int n, m, i; - struct ib_mr *ret; - - mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); - if (mr == NULL) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - mr->mr.pd = pd; - mr->mr.user_base = *iova_start; - mr->mr.iova = *iova_start; - mr->mr.length = 0; - mr->mr.offset = 0; - mr->mr.access_flags = acc; - mr->mr.max_segs = num_phys_buf; - mr->umem = NULL; - - m = 0; - n = 0; - for (i = 0; i < num_phys_buf; i++) { - mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr; - mr->mr.map[m]->segs[n].length = buffer_list[i].size; - mr->mr.length += buffer_list[i].size; - n++; - if (n == IPATH_SEGSZ) { - m++; - n = 0; - } - } - - ret = &mr->ibmr; - -bail: - return ret; -} - -/** - * ipath_reg_user_mr - register a userspace memory region - * @pd: protection domain for this memory region - * @start: starting userspace address - * @length: length of region to register - * @virt_addr: virtual address to use (from HCA's point of view) - * @mr_access_flags: access flags for this memory region - * @udata: unused by the InfiniPath driver - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata) -{ - struct ipath_mr *mr; - struct ib_umem *umem; - int n, m, entry; - struct scatterlist *sg; - struct ib_mr *ret; - - if (length == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); - if (IS_ERR(umem)) - return (void *) umem; - - n = umem->nmap; - mr = alloc_mr(n, &to_idev(pd->device)->lk_table); - if (!mr) { - ret = ERR_PTR(-ENOMEM); - ib_umem_release(umem); - goto bail; - } - - mr->mr.pd = pd; - mr->mr.user_base = start; - mr->mr.iova = virt_addr; - mr->mr.length = length; - mr->mr.offset = ib_umem_offset(umem); - mr->mr.access_flags = mr_access_flags; - mr->mr.max_segs = n; - mr->umem = umem; - - m = 0; - n = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - void *vaddr; - - vaddr = page_address(sg_page(sg)); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == IPATH_SEGSZ) { - m++; - n = 0; - } - } - ret = &mr->ibmr; - -bail: - return ret; -} - -/** - * ipath_dereg_mr - unregister and free a memory region - * @ibmr: the memory region to free - * - * Returns 0 on success. - * - * Note that this is called to free MRs created by ipath_get_dma_mr() - * or ipath_reg_user_mr(). - */ -int ipath_dereg_mr(struct ib_mr *ibmr) -{ - struct ipath_mr *mr = to_imr(ibmr); - int i; - - ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey); - i = mr->mr.mapsz; - while (i) { - i--; - kfree(mr->mr.map[i]); - } - - if (mr->umem) - ib_umem_release(mr->umem); - - kfree(mr); - return 0; -} - -/** - * ipath_alloc_fmr - allocate a fast memory region - * @pd: the protection domain for this memory region - * @mr_access_flags: access flags for this memory region - * @fmr_attr: fast memory region attributes - * - * Returns the memory region on success, otherwise returns an errno. - */ -struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct ipath_fmr *fmr; - int m, i = 0; - struct ib_fmr *ret; - - /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ; - fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); - if (!fmr) - goto bail; - - /* Allocate first level page tables. */ - for (; i < m; i++) { - fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], - GFP_KERNEL); - if (!fmr->mr.map[i]) - goto bail; - } - fmr->mr.mapsz = m; - - /* - * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & - * rkey. - */ - if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) - goto bail; - fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey; - /* - * Resources are allocated but no valid mapping (RKEY can't be - * used). - */ - fmr->mr.pd = pd; - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - fmr->mr.offset = 0; - fmr->mr.access_flags = mr_access_flags; - fmr->mr.max_segs = fmr_attr->max_pages; - fmr->page_shift = fmr_attr->page_shift; - - ret = &fmr->ibfmr; - goto done; - -bail: - while (i) - kfree(fmr->mr.map[--i]); - kfree(fmr); - ret = ERR_PTR(-ENOMEM); - -done: - return ret; -} - -/** - * ipath_map_phys_fmr - set up a fast memory region - * @ibmfr: the fast memory region to set up - * @page_list: the list of pages to associate with the fast memory region - * @list_len: the number of pages to associate with the fast memory region - * @iova: the virtual address of the start of the fast memory region - * - * This may be called from interrupt context. - */ - -int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, - int list_len, u64 iova) -{ - struct ipath_fmr *fmr = to_ifmr(ibfmr); - struct ipath_lkey_table *rkt; - unsigned long flags; - int m, n, i; - u32 ps; - int ret; - - if (list_len > fmr->mr.max_segs) { - ret = -EINVAL; - goto bail; - } - rkt = &to_idev(ibfmr->device)->lk_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = iova; - fmr->mr.iova = iova; - ps = 1 << fmr->page_shift; - fmr->mr.length = list_len * ps; - m = 0; - n = 0; - ps = 1 << fmr->page_shift; - for (i = 0; i < list_len; i++) { - fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; - fmr->mr.map[m]->segs[n].length = ps; - if (++n == IPATH_SEGSZ) { - m++; - n = 0; - } - } - spin_unlock_irqrestore(&rkt->lock, flags); - ret = 0; - -bail: - return ret; -} - -/** - * ipath_unmap_fmr - unmap fast memory regions - * @fmr_list: the list of fast memory regions to unmap - * - * Returns 0 on success. - */ -int ipath_unmap_fmr(struct list_head *fmr_list) -{ - struct ipath_fmr *fmr; - struct ipath_lkey_table *rkt; - unsigned long flags; - - list_for_each_entry(fmr, fmr_list, ibfmr.list) { - rkt = &to_idev(fmr->ibfmr.device)->lk_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - spin_unlock_irqrestore(&rkt->lock, flags); - } - return 0; -} - -/** - * ipath_dealloc_fmr - deallocate a fast memory region - * @ibfmr: the fast memory region to deallocate - * - * Returns 0 on success. - */ -int ipath_dealloc_fmr(struct ib_fmr *ibfmr) -{ - struct ipath_fmr *fmr = to_ifmr(ibfmr); - int i; - - ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey); - i = fmr->mr.mapsz; - while (i) - kfree(fmr->mr.map[--i]); - kfree(fmr); - return 0; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_qp.c b/kernel/drivers/infiniband/hw/ipath/ipath_qp.c deleted file mode 100644 index face87602..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_qp.c +++ /dev/null @@ -1,1080 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/err.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) -#define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \ - (off)) -#define find_next_offset(map, off) find_next_zero_bit((map)->page, \ - BITS_PER_PAGE, off) - -/* - * Convert the AETH credit code into the number of credits. - */ -static u32 credit_table[31] = { - 0, /* 0 */ - 1, /* 1 */ - 2, /* 2 */ - 3, /* 3 */ - 4, /* 4 */ - 6, /* 5 */ - 8, /* 6 */ - 12, /* 7 */ - 16, /* 8 */ - 24, /* 9 */ - 32, /* A */ - 48, /* B */ - 64, /* C */ - 96, /* D */ - 128, /* E */ - 192, /* F */ - 256, /* 10 */ - 384, /* 11 */ - 512, /* 12 */ - 768, /* 13 */ - 1024, /* 14 */ - 1536, /* 15 */ - 2048, /* 16 */ - 3072, /* 17 */ - 4096, /* 18 */ - 6144, /* 19 */ - 8192, /* 1A */ - 12288, /* 1B */ - 16384, /* 1C */ - 24576, /* 1D */ - 32768 /* 1E */ -}; - - -static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map) -{ - unsigned long page = get_zeroed_page(GFP_KERNEL); - unsigned long flags; - - /* - * Free the page if someone raced with us installing it. - */ - - spin_lock_irqsave(&qpt->lock, flags); - if (map->page) - free_page(page); - else - map->page = (void *)page; - spin_unlock_irqrestore(&qpt->lock, flags); -} - - -static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type) -{ - u32 i, offset, max_scan, qpn; - struct qpn_map *map; - u32 ret = -1; - - if (type == IB_QPT_SMI) - ret = 0; - else if (type == IB_QPT_GSI) - ret = 1; - - if (ret != -1) { - map = &qpt->map[0]; - if (unlikely(!map->page)) { - get_map_page(qpt, map); - if (unlikely(!map->page)) { - ret = -ENOMEM; - goto bail; - } - } - if (!test_and_set_bit(ret, map->page)) - atomic_dec(&map->n_free); - else - ret = -EBUSY; - goto bail; - } - - qpn = qpt->last + 1; - if (qpn >= QPN_MAX) - qpn = 2; - offset = qpn & BITS_PER_PAGE_MASK; - map = &qpt->map[qpn / BITS_PER_PAGE]; - max_scan = qpt->nmaps - !offset; - for (i = 0;;) { - if (unlikely(!map->page)) { - get_map_page(qpt, map); - if (unlikely(!map->page)) - break; - } - if (likely(atomic_read(&map->n_free))) { - do { - if (!test_and_set_bit(offset, map->page)) { - atomic_dec(&map->n_free); - qpt->last = qpn; - ret = qpn; - goto bail; - } - offset = find_next_offset(map, offset); - qpn = mk_qpn(qpt, map, offset); - /* - * This test differs from alloc_pidmap(). - * If find_next_offset() does find a zero - * bit, we don't need to check for QPN - * wrapping around past our starting QPN. - * We just need to be sure we don't loop - * forever. - */ - } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); - } - /* - * In order to keep the number of pages allocated to a - * minimum, we scan the all existing pages before increasing - * the size of the bitmap table. - */ - if (++i > max_scan) { - if (qpt->nmaps == QPNMAP_ENTRIES) - break; - map = &qpt->map[qpt->nmaps++]; - offset = 0; - } else if (map < &qpt->map[qpt->nmaps]) { - ++map; - offset = 0; - } else { - map = &qpt->map[0]; - offset = 2; - } - qpn = mk_qpn(qpt, map, offset); - } - - ret = -ENOMEM; - -bail: - return ret; -} - -static void free_qpn(struct ipath_qp_table *qpt, u32 qpn) -{ - struct qpn_map *map; - - map = qpt->map + qpn / BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); - atomic_inc(&map->n_free); -} - -/** - * ipath_alloc_qpn - allocate a QP number - * @qpt: the QP table - * @qp: the QP - * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special) - * - * Allocate the next available QPN and put the QP into the hash table. - * The hash table holds a reference to the QP. - */ -static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp, - enum ib_qp_type type) -{ - unsigned long flags; - int ret; - - ret = alloc_qpn(qpt, type); - if (ret < 0) - goto bail; - qp->ibqp.qp_num = ret; - - /* Add the QP to the hash table. */ - spin_lock_irqsave(&qpt->lock, flags); - - ret %= qpt->max; - qp->next = qpt->table[ret]; - qpt->table[ret] = qp; - atomic_inc(&qp->refcount); - - spin_unlock_irqrestore(&qpt->lock, flags); - ret = 0; - -bail: - return ret; -} - -/** - * ipath_free_qp - remove a QP from the QP table - * @qpt: the QP table - * @qp: the QP to remove - * - * Remove the QP from the table so it can't be found asynchronously by - * the receive interrupt routine. - */ -static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) -{ - struct ipath_qp *q, **qpp; - unsigned long flags; - - spin_lock_irqsave(&qpt->lock, flags); - - /* Remove QP from the hash table. */ - qpp = &qpt->table[qp->ibqp.qp_num % qpt->max]; - for (; (q = *qpp) != NULL; qpp = &q->next) { - if (q == qp) { - *qpp = qp->next; - qp->next = NULL; - atomic_dec(&qp->refcount); - break; - } - } - - spin_unlock_irqrestore(&qpt->lock, flags); -} - -/** - * ipath_free_all_qps - check for QPs still in use - * @qpt: the QP table to empty - * - * There should not be any QPs still in use. - * Free memory for table. - */ -unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) -{ - unsigned long flags; - struct ipath_qp *qp; - u32 n, qp_inuse = 0; - - spin_lock_irqsave(&qpt->lock, flags); - for (n = 0; n < qpt->max; n++) { - qp = qpt->table[n]; - qpt->table[n] = NULL; - - for (; qp; qp = qp->next) - qp_inuse++; - } - spin_unlock_irqrestore(&qpt->lock, flags); - - for (n = 0; n < ARRAY_SIZE(qpt->map); n++) - if (qpt->map[n].page) - free_page((unsigned long) qpt->map[n].page); - return qp_inuse; -} - -/** - * ipath_lookup_qpn - return the QP with the given QPN - * @qpt: the QP table - * @qpn: the QP number to look up - * - * The caller is responsible for decrementing the QP reference count - * when done. - */ -struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn) -{ - unsigned long flags; - struct ipath_qp *qp; - - spin_lock_irqsave(&qpt->lock, flags); - - for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) { - if (qp->ibqp.qp_num == qpn) { - atomic_inc(&qp->refcount); - break; - } - } - - spin_unlock_irqrestore(&qpt->lock, flags); - return qp; -} - -/** - * ipath_reset_qp - initialize the QP state to the reset state - * @qp: the QP to reset - * @type: the QP type - */ -static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) -{ - qp->remote_qpn = 0; - qp->qkey = 0; - qp->qp_access_flags = 0; - atomic_set(&qp->s_dma_busy, 0); - qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; - qp->s_hdrwords = 0; - qp->s_wqe = NULL; - qp->s_pkt_delay = 0; - qp->s_draining = 0; - qp->s_psn = 0; - qp->r_psn = 0; - qp->r_msn = 0; - if (type == IB_QPT_RC) { - qp->s_state = IB_OPCODE_RC_SEND_LAST; - qp->r_state = IB_OPCODE_RC_SEND_LAST; - } else { - qp->s_state = IB_OPCODE_UC_SEND_LAST; - qp->r_state = IB_OPCODE_UC_SEND_LAST; - } - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - qp->r_nak_state = 0; - qp->r_aflags = 0; - qp->r_flags = 0; - qp->s_rnr_timeout = 0; - qp->s_head = 0; - qp->s_tail = 0; - qp->s_cur = 0; - qp->s_last = 0; - qp->s_ssn = 1; - qp->s_lsn = 0; - memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); - qp->r_head_ack_queue = 0; - qp->s_tail_ack_queue = 0; - qp->s_num_rd_atomic = 0; - if (qp->r_rq.wq) { - qp->r_rq.wq->head = 0; - qp->r_rq.wq->tail = 0; - } -} - -/** - * ipath_error_qp - put a QP into the error state - * @qp: the QP to put into the error state - * @err: the receive completion error to signal if a RWQE is active - * - * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. - * The QP s_lock should be held and interrupts disabled. - * If we are already in error state, just return. - */ - -int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ib_wc wc; - int ret = 0; - - if (qp->state == IB_QPS_ERR) - goto bail; - - qp->state = IB_QPS_ERR; - - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - - /* Schedule the sending tasklet to drain the send work queue. */ - if (qp->s_last != qp->s_head) - ipath_schedule_send(qp); - - memset(&wc, 0, sizeof(wc)); - wc.qp = &qp->ibqp; - wc.opcode = IB_WC_RECV; - - if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { - wc.wr_id = qp->r_wr_id; - wc.status = err; - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); - } - wc.status = IB_WC_WR_FLUSH_ERR; - - if (qp->r_rq.wq) { - struct ipath_rwq *wq; - u32 head; - u32 tail; - - spin_lock(&qp->r_rq.lock); - - /* sanity check pointers before trusting them */ - wq = qp->r_rq.wq; - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - while (tail != head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; - if (++tail >= qp->r_rq.size) - tail = 0; - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); - } - wq->tail = tail; - - spin_unlock(&qp->r_rq.lock); - } else if (qp->ibqp.event_handler) - ret = 1; - -bail: - return ret; -} - -/** - * ipath_modify_qp - modify the attributes of a queue pair - * @ibqp: the queue pair who's attributes we're modifying - * @attr: the new attributes - * @attr_mask: the mask of attributes to modify - * @udata: user data for ipathverbs.so - * - * Returns 0 on success, otherwise returns an errno. - */ -int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - struct ipath_ibdev *dev = to_idev(ibqp->device); - struct ipath_qp *qp = to_iqp(ibqp); - enum ib_qp_state cur_state, new_state; - int lastwqe = 0; - int ret; - - spin_lock_irq(&qp->s_lock); - - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : qp->state; - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) - goto inval; - - if (attr_mask & IB_QP_AV) { - if (attr->ah_attr.dlid == 0 || - attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) - goto inval; - - if ((attr->ah_attr.ah_flags & IB_AH_GRH) && - (attr->ah_attr.grh.sgid_index > 1)) - goto inval; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) - goto inval; - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - if (attr->min_rnr_timer > 31) - goto inval; - - if (attr_mask & IB_QP_PORT) - if (attr->port_num == 0 || - attr->port_num > ibqp->device->phys_port_cnt) - goto inval; - - /* - * don't allow invalid Path MTU values or greater than 2048 - * unless we are configured for a 4KB MTU - */ - if ((attr_mask & IB_QP_PATH_MTU) && - (ib_mtu_enum_to_int(attr->path_mtu) == -1 || - (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096))) - goto inval; - - if (attr_mask & IB_QP_PATH_MIG_STATE) - if (attr->path_mig_state != IB_MIG_MIGRATED && - attr->path_mig_state != IB_MIG_REARM) - goto inval; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC) - goto inval; - - switch (new_state) { - case IB_QPS_RESET: - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - qp->s_flags &= ~IPATH_S_ANY_WAIT; - spin_unlock_irq(&qp->s_lock); - /* Stop the sending tasklet */ - tasklet_kill(&qp->s_task); - wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); - spin_lock_irq(&qp->s_lock); - } - ipath_reset_qp(qp, ibqp->qp_type); - break; - - case IB_QPS_SQD: - qp->s_draining = qp->s_last != qp->s_cur; - qp->state = new_state; - break; - - case IB_QPS_SQE: - if (qp->ibqp.qp_type == IB_QPT_RC) - goto inval; - qp->state = new_state; - break; - - case IB_QPS_ERR: - lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); - break; - - default: - qp->state = new_state; - break; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - qp->s_pkey_index = attr->pkey_index; - - if (attr_mask & IB_QP_DEST_QPN) - qp->remote_qpn = attr->dest_qp_num; - - if (attr_mask & IB_QP_SQ_PSN) { - qp->s_psn = qp->s_next_psn = attr->sq_psn; - qp->s_last_psn = qp->s_next_psn - 1; - } - - if (attr_mask & IB_QP_RQ_PSN) - qp->r_psn = attr->rq_psn; - - if (attr_mask & IB_QP_ACCESS_FLAGS) - qp->qp_access_flags = attr->qp_access_flags; - - if (attr_mask & IB_QP_AV) { - qp->remote_ah_attr = attr->ah_attr; - qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate); - } - - if (attr_mask & IB_QP_PATH_MTU) - qp->path_mtu = attr->path_mtu; - - if (attr_mask & IB_QP_RETRY_CNT) - qp->s_retry = qp->s_retry_cnt = attr->retry_cnt; - - if (attr_mask & IB_QP_RNR_RETRY) { - qp->s_rnr_retry = attr->rnr_retry; - if (qp->s_rnr_retry > 7) - qp->s_rnr_retry = 7; - qp->s_rnr_retry_cnt = qp->s_rnr_retry; - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - qp->r_min_rnr_timer = attr->min_rnr_timer; - - if (attr_mask & IB_QP_TIMEOUT) - qp->timeout = attr->timeout; - - if (attr_mask & IB_QP_QKEY) - qp->qkey = attr->qkey; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - qp->r_max_rd_atomic = attr->max_dest_rd_atomic; - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) - qp->s_max_rd_atomic = attr->max_rd_atomic; - - spin_unlock_irq(&qp->s_lock); - - if (lastwqe) { - struct ib_event ev; - - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); - } - ret = 0; - goto bail; - -inval: - spin_unlock_irq(&qp->s_lock); - ret = -EINVAL; - -bail: - return ret; -} - -int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr) -{ - struct ipath_qp *qp = to_iqp(ibqp); - - attr->qp_state = qp->state; - attr->cur_qp_state = attr->qp_state; - attr->path_mtu = qp->path_mtu; - attr->path_mig_state = 0; - attr->qkey = qp->qkey; - attr->rq_psn = qp->r_psn; - attr->sq_psn = qp->s_next_psn; - attr->dest_qp_num = qp->remote_qpn; - attr->qp_access_flags = qp->qp_access_flags; - attr->cap.max_send_wr = qp->s_size - 1; - attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; - attr->cap.max_send_sge = qp->s_max_sge; - attr->cap.max_recv_sge = qp->r_rq.max_sge; - attr->cap.max_inline_data = 0; - attr->ah_attr = qp->remote_ah_attr; - memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr)); - attr->pkey_index = qp->s_pkey_index; - attr->alt_pkey_index = 0; - attr->en_sqd_async_notify = 0; - attr->sq_draining = qp->s_draining; - attr->max_rd_atomic = qp->s_max_rd_atomic; - attr->max_dest_rd_atomic = qp->r_max_rd_atomic; - attr->min_rnr_timer = qp->r_min_rnr_timer; - attr->port_num = 1; - attr->timeout = qp->timeout; - attr->retry_cnt = qp->s_retry_cnt; - attr->rnr_retry = qp->s_rnr_retry_cnt; - attr->alt_port_num = 0; - attr->alt_timeout = 0; - - init_attr->event_handler = qp->ibqp.event_handler; - init_attr->qp_context = qp->ibqp.qp_context; - init_attr->send_cq = qp->ibqp.send_cq; - init_attr->recv_cq = qp->ibqp.recv_cq; - init_attr->srq = qp->ibqp.srq; - init_attr->cap = attr->cap; - if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR) - init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; - else - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->qp_type = qp->ibqp.qp_type; - init_attr->port_num = 1; - return 0; -} - -/** - * ipath_compute_aeth - compute the AETH (syndrome + MSN) - * @qp: the queue pair to compute the AETH for - * - * Returns the AETH. - */ -__be32 ipath_compute_aeth(struct ipath_qp *qp) -{ - u32 aeth = qp->r_msn & IPATH_MSN_MASK; - - if (qp->ibqp.srq) { - /* - * Shared receive queues don't generate credits. - * Set the credit field to the invalid value. - */ - aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT; - } else { - u32 min, max, x; - u32 credits; - struct ipath_rwq *wq = qp->r_rq.wq; - u32 head; - u32 tail; - - /* sanity check pointers before trusting them */ - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - /* - * Compute the number of credits available (RWQEs). - * XXX Not holding the r_rq.lock here so there is a small - * chance that the pair of reads are not atomic. - */ - credits = head - tail; - if ((int)credits < 0) - credits += qp->r_rq.size; - /* - * Binary search the credit table to find the code to - * use. - */ - min = 0; - max = 31; - for (;;) { - x = (min + max) / 2; - if (credit_table[x] == credits) - break; - if (credit_table[x] > credits) - max = x; - else if (min == x) - break; - else - min = x; - } - aeth |= x << IPATH_AETH_CREDIT_SHIFT; - } - return cpu_to_be32(aeth); -} - -/** - * ipath_create_qp - create a queue pair for a device - * @ibpd: the protection domain who's device we create the queue pair for - * @init_attr: the attributes of the queue pair - * @udata: unused by InfiniPath - * - * Returns the queue pair on success, otherwise returns an errno. - * - * Called by the ib_create_qp() core verbs function. - */ -struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) -{ - struct ipath_qp *qp; - int err; - struct ipath_swqe *swq = NULL; - struct ipath_ibdev *dev; - size_t sz; - size_t sg_list_sz; - struct ib_qp *ret; - - if (init_attr->create_flags) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - if (init_attr->cap.max_send_sge > ib_ipath_max_sges || - init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - /* Check receive queue parameters if no SRQ is specified. */ - if (!init_attr->srq) { - if (init_attr->cap.max_recv_sge > ib_ipath_max_sges || - init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - if (init_attr->cap.max_send_sge + - init_attr->cap.max_send_wr + - init_attr->cap.max_recv_sge + - init_attr->cap.max_recv_wr == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - } - - switch (init_attr->qp_type) { - case IB_QPT_UC: - case IB_QPT_RC: - case IB_QPT_UD: - case IB_QPT_SMI: - case IB_QPT_GSI: - sz = sizeof(struct ipath_sge) * - init_attr->cap.max_send_sge + - sizeof(struct ipath_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); - if (swq == NULL) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - sz = sizeof(*qp); - sg_list_sz = 0; - if (init_attr->srq) { - struct ipath_srq *srq = to_isrq(init_attr->srq); - - if (srq->rq.max_sge > 1) - sg_list_sz = sizeof(*qp->r_sg_list) * - (srq->rq.max_sge - 1); - } else if (init_attr->cap.max_recv_sge > 1) - sg_list_sz = sizeof(*qp->r_sg_list) * - (init_attr->cap.max_recv_sge - 1); - qp = kmalloc(sz + sg_list_sz, GFP_KERNEL); - if (!qp) { - ret = ERR_PTR(-ENOMEM); - goto bail_swq; - } - if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD || - init_attr->qp_type == IB_QPT_SMI || - init_attr->qp_type == IB_QPT_GSI)) { - qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL); - if (!qp->r_ud_sg_list) { - ret = ERR_PTR(-ENOMEM); - goto bail_qp; - } - } else - qp->r_ud_sg_list = NULL; - if (init_attr->srq) { - sz = 0; - qp->r_rq.size = 0; - qp->r_rq.max_sge = 0; - qp->r_rq.wq = NULL; - init_attr->cap.max_recv_wr = 0; - init_attr->cap.max_recv_sge = 0; - } else { - qp->r_rq.size = init_attr->cap.max_recv_wr + 1; - qp->r_rq.max_sge = init_attr->cap.max_recv_sge; - sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + - sizeof(struct ipath_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + - qp->r_rq.size * sz); - if (!qp->r_rq.wq) { - ret = ERR_PTR(-ENOMEM); - goto bail_sg_list; - } - } - - /* - * ib_create_qp() will initialize qp->ibqp - * except for qp->ibqp.qp_num. - */ - spin_lock_init(&qp->s_lock); - spin_lock_init(&qp->r_rq.lock); - atomic_set(&qp->refcount, 0); - init_waitqueue_head(&qp->wait); - init_waitqueue_head(&qp->wait_dma); - tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); - INIT_LIST_HEAD(&qp->piowait); - INIT_LIST_HEAD(&qp->timerwait); - qp->state = IB_QPS_RESET; - qp->s_wq = swq; - qp->s_size = init_attr->cap.max_send_wr + 1; - qp->s_max_sge = init_attr->cap.max_send_sge; - if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) - qp->s_flags = IPATH_S_SIGNAL_REQ_WR; - else - qp->s_flags = 0; - dev = to_idev(ibpd->device); - err = ipath_alloc_qpn(&dev->qp_table, qp, - init_attr->qp_type); - if (err) { - ret = ERR_PTR(err); - vfree(qp->r_rq.wq); - goto bail_sg_list; - } - qp->ip = NULL; - qp->s_tx = NULL; - ipath_reset_qp(qp, init_attr->qp_type); - break; - - default: - /* Don't support raw QPs */ - ret = ERR_PTR(-ENOSYS); - goto bail; - } - - init_attr->cap.max_inline_data = 0; - - /* - * Return the address of the RWQ as the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - if (!qp->r_rq.wq) { - __u64 offset = 0; - - err = ib_copy_to_udata(udata, &offset, - sizeof(offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else { - u32 s = sizeof(struct ipath_rwq) + - qp->r_rq.size * sz; - - qp->ip = - ipath_create_mmap_info(dev, s, - ibpd->uobject->context, - qp->r_rq.wq); - if (!qp->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - err = ib_copy_to_udata(udata, &(qp->ip->offset), - sizeof(qp->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } - } - - spin_lock(&dev->n_qps_lock); - if (dev->n_qps_allocated == ib_ipath_max_qps) { - spin_unlock(&dev->n_qps_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_qps_allocated++; - spin_unlock(&dev->n_qps_lock); - - if (qp->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - - ret = &qp->ibqp; - goto bail; - -bail_ip: - if (qp->ip) - kref_put(&qp->ip->ref, ipath_release_mmap_info); - else - vfree(qp->r_rq.wq); - ipath_free_qp(&dev->qp_table, qp); - free_qpn(&dev->qp_table, qp->ibqp.qp_num); -bail_sg_list: - kfree(qp->r_ud_sg_list); -bail_qp: - kfree(qp); -bail_swq: - vfree(swq); -bail: - return ret; -} - -/** - * ipath_destroy_qp - destroy a queue pair - * @ibqp: the queue pair to destroy - * - * Returns 0 on success. - * - * Note that this can be called while the QP is actively sending or - * receiving! - */ -int ipath_destroy_qp(struct ib_qp *ibqp) -{ - struct ipath_qp *qp = to_iqp(ibqp); - struct ipath_ibdev *dev = to_idev(ibqp->device); - - /* Make sure HW and driver activity is stopped. */ - spin_lock_irq(&qp->s_lock); - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - qp->s_flags &= ~IPATH_S_ANY_WAIT; - spin_unlock_irq(&qp->s_lock); - /* Stop the sending tasklet */ - tasklet_kill(&qp->s_task); - wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); - } else - spin_unlock_irq(&qp->s_lock); - - ipath_free_qp(&dev->qp_table, qp); - - if (qp->s_tx) { - atomic_dec(&qp->refcount); - if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) - kfree(qp->s_tx->txreq.map_addr); - spin_lock_irq(&dev->pending_lock); - list_add(&qp->s_tx->txreq.list, &dev->txreq_free); - spin_unlock_irq(&dev->pending_lock); - qp->s_tx = NULL; - } - - wait_event(qp->wait, !atomic_read(&qp->refcount)); - - /* all user's cleaned up, mark it available */ - free_qpn(&dev->qp_table, qp->ibqp.qp_num); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); - - if (qp->ip) - kref_put(&qp->ip->ref, ipath_release_mmap_info); - else - vfree(qp->r_rq.wq); - kfree(qp->r_ud_sg_list); - vfree(qp->s_wq); - kfree(qp); - return 0; -} - -/** - * ipath_init_qp_table - initialize the QP table for a device - * @idev: the device who's QP table we're initializing - * @size: the size of the QP table - * - * Returns 0 on success, otherwise returns an errno. - */ -int ipath_init_qp_table(struct ipath_ibdev *idev, int size) -{ - int i; - int ret; - - idev->qp_table.last = 1; /* QPN 0 and 1 are special. */ - idev->qp_table.max = size; - idev->qp_table.nmaps = 1; - idev->qp_table.table = kzalloc(size * sizeof(*idev->qp_table.table), - GFP_KERNEL); - if (idev->qp_table.table == NULL) { - ret = -ENOMEM; - goto bail; - } - - for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) { - atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE); - idev->qp_table.map[i].page = NULL; - } - - ret = 0; - -bail: - return ret; -} - -/** - * ipath_get_credit - flush the send work queue of a QP - * @qp: the qp who's send work queue to flush - * @aeth: the Acknowledge Extended Transport Header - * - * The QP s_lock should be held. - */ -void ipath_get_credit(struct ipath_qp *qp, u32 aeth) -{ - u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK; - - /* - * If the credit is invalid, we can send - * as many packets as we like. Otherwise, we have to - * honor the credit field. - */ - if (credit == IPATH_AETH_CREDIT_INVAL) - qp->s_lsn = (u32) -1; - else if (qp->s_lsn != (u32) -1) { - /* Compute new LSN (i.e., MSN + credit) */ - credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK; - if (ipath_cmp24(credit, qp->s_lsn) > 0) - qp->s_lsn = credit; - } - - /* Restart sending if it was blocked due to lack of credits. */ - if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && - qp->s_cur != qp->s_head && - (qp->s_lsn == (u32) -1 || - ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, - qp->s_lsn + 1) <= 0)) - ipath_schedule_send(qp); -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_rc.c b/kernel/drivers/infiniband/hw/ipath/ipath_rc.c deleted file mode 100644 index 79b3dbc97..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_rc.c +++ /dev/null @@ -1,1969 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/io.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_RC_##x - -static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, - u32 psn, u32 pmtu) -{ - u32 len; - - len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; - ss->sge = wqe->sg_list[0]; - ss->sg_list = wqe->sg_list + 1; - ss->num_sge = wqe->wr.num_sge; - ipath_skip_sge(ss, len); - return wqe->length - len; -} - -/** - * ipath_init_restart- initialize the qp->s_sge after a restart - * @qp: the QP who's SGE we're restarting - * @wqe: the work queue to initialize the QP's SGE from - * - * The QP s_lock should be held and interrupts disabled. - */ -static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) -{ - struct ipath_ibdev *dev; - - qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, - ib_mtu_enum_to_int(qp->path_mtu)); - dev = to_idev(qp->ibqp.device); - spin_lock(&dev->pending_lock); - if (list_empty(&qp->timerwait)) - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); -} - -/** - * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) - * @qp: a pointer to the QP - * @ohdr: a pointer to the IB header being constructed - * @pmtu: the path MTU - * - * Return 1 if constructed; otherwise, return 0. - * Note that we are in the responder's side of the QP context. - * Note the QP s_lock must be held. - */ -static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, - struct ipath_other_headers *ohdr, u32 pmtu) -{ - struct ipath_ack_entry *e; - u32 hwords; - u32 len; - u32 bth0; - u32 bth2; - - /* Don't send an ACK if we aren't supposed to. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto bail; - - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - hwords = 5; - - switch (qp->s_ack_state) { - case OP(RDMA_READ_RESPONSE_LAST): - case OP(RDMA_READ_RESPONSE_ONLY): - case OP(ATOMIC_ACKNOWLEDGE): - /* - * We can increment the tail pointer now that the last - * response has been sent instead of only being - * constructed. - */ - if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) - qp->s_tail_ack_queue = 0; - /* FALLTHROUGH */ - case OP(SEND_ONLY): - case OP(ACKNOWLEDGE): - /* Check for no next entry in the queue. */ - if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { - if (qp->s_flags & IPATH_S_ACK_PENDING) - goto normal; - qp->s_ack_state = OP(ACKNOWLEDGE); - goto bail; - } - - e = &qp->s_ack_queue[qp->s_tail_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST)) { - /* Copy SGE state in case we need to resend */ - qp->s_ack_rdma_sge = e->rdma_sge; - qp->s_cur_sge = &qp->s_ack_rdma_sge; - len = e->rdma_sge.sge.sge_length; - if (len > pmtu) { - len = pmtu; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); - } else { - qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); - e->sent = 1; - } - ohdr->u.aeth = ipath_compute_aeth(qp); - hwords++; - qp->s_ack_rdma_psn = e->psn; - bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; - } else { - /* COMPARE_SWAP or FETCH_ADD */ - qp->s_cur_sge = NULL; - len = 0; - qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); - ohdr->u.at.aeth = ipath_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); - hwords += sizeof(ohdr->u.at) / sizeof(u32); - bth2 = e->psn; - e->sent = 1; - } - bth0 = qp->s_ack_state << 24; - break; - - case OP(RDMA_READ_RESPONSE_FIRST): - qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); - /* FALLTHROUGH */ - case OP(RDMA_READ_RESPONSE_MIDDLE): - len = qp->s_ack_rdma_sge.sge.sge_length; - if (len > pmtu) - len = pmtu; - else { - ohdr->u.aeth = ipath_compute_aeth(qp); - hwords++; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); - qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1; - } - bth0 = qp->s_ack_state << 24; - bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; - break; - - default: - normal: - /* - * Send a regular ACK. - * Set the s_ack_state so we wait until after sending - * the ACK before setting s_ack_state to ACKNOWLEDGE - * (see above). - */ - qp->s_ack_state = OP(SEND_ONLY); - qp->s_flags &= ~IPATH_S_ACK_PENDING; - qp->s_cur_sge = NULL; - if (qp->s_nak_state) - ohdr->u.aeth = - cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | - (qp->s_nak_state << - IPATH_AETH_CREDIT_SHIFT)); - else - ohdr->u.aeth = ipath_compute_aeth(qp); - hwords++; - len = 0; - bth0 = OP(ACKNOWLEDGE) << 24; - bth2 = qp->s_ack_psn & IPATH_PSN_MASK; - } - qp->s_hdrwords = hwords; - qp->s_cur_size = len; - ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2); - return 1; - -bail: - return 0; -} - -/** - * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) - * @qp: a pointer to the QP - * - * Return 1 if constructed; otherwise, return 0. - */ -int ipath_make_rc_req(struct ipath_qp *qp) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_other_headers *ohdr; - struct ipath_sge_state *ss; - struct ipath_swqe *wqe; - u32 hwords; - u32 len; - u32 bth0; - u32 bth2; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - char newreq; - unsigned long flags; - int ret = 0; - - ohdr = &qp->s_hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; - - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout resends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - - /* Sending responses has higher priority over sending requests. */ - if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || - (qp->s_flags & IPATH_S_ACK_PENDING) || - qp->s_ack_state != OP(ACKNOWLEDGE)) && - ipath_make_rc_ack(dev, qp, ohdr, pmtu)) - goto done; - - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { - if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) - goto bail; - /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) - goto bail; - /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { - qp->s_flags |= IPATH_S_WAIT_DMA; - goto bail; - } - wqe = get_swqe_ptr(qp, qp->s_last); - ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); - goto done; - } - - /* Leave BUSY set until RNR timeout. */ - if (qp->s_rnr_timeout) { - qp->s_flags |= IPATH_S_WAITING; - goto bail; - } - - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - hwords = 5; - bth0 = 1 << 22; /* Set M bit */ - - /* Send a request. */ - wqe = get_swqe_ptr(qp, qp->s_cur); - switch (qp->s_state) { - default: - if (!(ib_ipath_state_ops[qp->state] & - IPATH_PROCESS_NEXT_SEND_OK)) - goto bail; - /* - * Resend an old request or start a new one. - * - * We keep track of the current SWQE so that - * we don't reset the "furthest progress" state - * if we need to back up. - */ - newreq = 0; - if (qp->s_cur == qp->s_tail) { - /* Check if send work queue is empty. */ - if (qp->s_tail == qp->s_head) - goto bail; - /* - * If a fence is requested, wait for previous - * RDMA read and atomic operations to finish. - */ - if ((wqe->wr.send_flags & IB_SEND_FENCE) && - qp->s_num_rd_atomic) { - qp->s_flags |= IPATH_S_FENCE_PENDING; - goto bail; - } - wqe->psn = qp->s_next_psn; - newreq = 1; - } - /* - * Note that we have to be careful not to modify the - * original work request since we may need to resend - * it. - */ - len = wqe->length; - ss = &qp->s_sge; - bth2 = 0; - switch (wqe->wr.opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - /* If no credit, return. */ - if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; - goto bail; - } - wqe->lpsn = wqe->psn; - if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; - qp->s_state = OP(SEND_FIRST); - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_SEND) - qp->s_state = OP(SEND_ONLY); - else { - qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - bth2 = 1 << 31; /* Request ACK. */ - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - - case IB_WR_RDMA_WRITE: - if (newreq && qp->s_lsn != (u32) -1) - qp->s_lsn++; - /* FALLTHROUGH */ - case IB_WR_RDMA_WRITE_WITH_IMM: - /* If no credit, return. */ - if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; - goto bail; - } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(len); - hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; - if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; - qp->s_state = OP(RDMA_WRITE_FIRST); - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) - qp->s_state = OP(RDMA_WRITE_ONLY); - else { - qp->s_state = - OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); - /* Immediate data comes after RETH */ - ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - } - bth2 = 1 << 31; /* Request ACK. */ - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - - case IB_WR_RDMA_READ: - /* - * Don't allow more operations to be started - * than the QP limits allow. - */ - if (newreq) { - if (qp->s_num_rd_atomic >= - qp->s_max_rd_atomic) { - qp->s_flags |= IPATH_S_RDMAR_PENDING; - goto bail; - } - qp->s_num_rd_atomic++; - if (qp->s_lsn != (u32) -1) - qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; - } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(len); - qp->s_state = OP(RDMA_READ_REQUEST); - hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); - ss = NULL; - len = 0; - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - /* - * Don't allow more operations to be started - * than the QP limits allow. - */ - if (newreq) { - if (qp->s_num_rd_atomic >= - qp->s_max_rd_atomic) { - qp->s_flags |= IPATH_S_RDMAR_PENDING; - goto bail; - } - qp->s_num_rd_atomic++; - if (qp->s_lsn != (u32) -1) - qp->s_lsn++; - wqe->lpsn = wqe->psn; - } - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); - } else { - qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); - ohdr->u.atomic_eth.compare_data = 0; - } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr); - ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); - hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); - ss = NULL; - len = 0; - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - - default: - goto bail; - } - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - qp->s_len = wqe->length; - if (newreq) { - qp->s_tail++; - if (qp->s_tail >= qp->s_size) - qp->s_tail = 0; - } - bth2 |= qp->s_psn & IPATH_PSN_MASK; - if (wqe->wr.opcode == IB_WR_RDMA_READ) - qp->s_psn = wqe->lpsn + 1; - else { - qp->s_psn++; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } - /* - * Put the QP on the pending list so lost ACKs will cause - * a retry. More than one request can be pending so the - * QP may already be on the dev->pending list. - */ - spin_lock(&dev->pending_lock); - if (list_empty(&qp->timerwait)) - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); - break; - - case OP(RDMA_READ_RESPONSE_FIRST): - /* - * This case can only happen if a send is restarted. - * See ipath_restart_rc(). - */ - ipath_init_restart(qp, wqe); - /* FALLTHROUGH */ - case OP(SEND_FIRST): - qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ - case OP(SEND_MIDDLE): - bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - ss = &qp->s_sge; - len = qp->s_len; - if (len > pmtu) { - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_SEND) - qp->s_state = OP(SEND_LAST); - else { - qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - bth2 |= 1 << 31; /* Request ACK. */ - qp->s_cur++; - if (qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - case OP(RDMA_READ_RESPONSE_LAST): - /* - * This case can only happen if a RDMA write is restarted. - * See ipath_restart_rc(). - */ - ipath_init_restart(qp, wqe); - /* FALLTHROUGH */ - case OP(RDMA_WRITE_FIRST): - qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ - case OP(RDMA_WRITE_MIDDLE): - bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - ss = &qp->s_sge; - len = qp->s_len; - if (len > pmtu) { - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) - qp->s_state = OP(RDMA_WRITE_LAST); - else { - qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - } - bth2 |= 1 << 31; /* Request ACK. */ - qp->s_cur++; - if (qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - case OP(RDMA_READ_RESPONSE_MIDDLE): - /* - * This case can only happen if a RDMA read is restarted. - * See ipath_restart_rc(). - */ - ipath_init_restart(qp, wqe); - len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); - qp->s_state = OP(RDMA_READ_REQUEST); - hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); - bth2 = qp->s_psn & IPATH_PSN_MASK; - qp->s_psn = wqe->lpsn + 1; - ss = NULL; - len = 0; - qp->s_cur++; - if (qp->s_cur == qp->s_size) - qp->s_cur = 0; - break; - } - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) - bth2 |= 1 << 31; /* Request ACK. */ - qp->s_len -= len; - qp->s_hdrwords = hwords; - qp->s_cur_sge = ss; - qp->s_cur_size = len; - ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); -done: - ret = 1; - goto unlock; - -bail: - qp->s_flags &= ~IPATH_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * send_rc_ack - Construct an ACK packet and send it - * @qp: a pointer to the QP - * - * This is called from ipath_rc_rcv() and only uses the receive - * side QP state. - * Note that RDMA reads and atomics are handled in the - * send side QP state and tasklet. - */ -static void send_rc_ack(struct ipath_qp *qp) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_devdata *dd; - u16 lrh0; - u32 bth0; - u32 hwords; - u32 __iomem *piobuf; - struct ipath_ib_header hdr; - struct ipath_other_headers *ohdr; - unsigned long flags; - - spin_lock_irqsave(&qp->s_lock, flags); - - /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ - if (qp->r_head_ack_queue != qp->s_tail_ack_queue || - (qp->s_flags & IPATH_S_ACK_PENDING) || - qp->s_ack_state != OP(ACKNOWLEDGE)) - goto queue_ack; - - spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Don't try to send ACKs if the link isn't ACTIVE */ - dd = dev->dd; - if (!(dd->ipath_flags & IPATH_LINKACTIVE)) - goto done; - - piobuf = ipath_getpiobuf(dd, 0, NULL); - if (!piobuf) { - /* - * We are out of PIO buffers at the moment. - * Pass responsibility for sending the ACK to the - * send tasklet so that when a PIO buffer becomes - * available, the ACK is sent ahead of other outgoing - * packets. - */ - spin_lock_irqsave(&qp->s_lock, flags); - goto queue_ack; - } - - /* Construct the header. */ - ohdr = &hdr.u.oth; - lrh0 = IPATH_LRH_BTH; - /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ - hwords = 6; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - hwords += ipath_make_grh(dev, &hdr.u.l.grh, - &qp->remote_ah_attr.grh, - hwords, 0); - ohdr = &hdr.u.l.oth; - lrh0 = IPATH_LRH_GRH; - } - /* read pkey_index w/o lock (its atomic) */ - bth0 = ipath_get_pkey(dd, qp->s_pkey_index) | - (OP(ACKNOWLEDGE) << 24) | (1 << 22); - if (qp->r_nak_state) - ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | - (qp->r_nak_state << - IPATH_AETH_CREDIT_SHIFT)); - else - ohdr->u.aeth = ipath_compute_aeth(qp); - lrh0 |= qp->remote_ah_attr.sl << 4; - hdr.lrh[0] = cpu_to_be16(lrh0); - hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(dd->ipath_lid | - qp->remote_ah_attr.src_path_bits); - ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); - - writeq(hwords + 1, piobuf); - - if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { - u32 *hdrp = (u32 *) &hdr; - - ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, hdrp, hwords - 1); - ipath_flush_wc(); - __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1); - } else - __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords); - - ipath_flush_wc(); - - dev->n_unicast_xmit++; - goto done; - -queue_ack: - if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { - dev->n_rc_qacks++; - qp->s_flags |= IPATH_S_ACK_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; - - /* Schedule the send tasklet. */ - ipath_schedule_send(qp); - } - spin_unlock_irqrestore(&qp->s_lock, flags); -done: - return; -} - -/** - * reset_psn - reset the QP state to send starting from PSN - * @qp: the QP - * @psn: the packet sequence number to restart at - * - * This is called from ipath_rc_rcv() to process an incoming RC ACK - * for the given QP. - * Called at interrupt level with the QP s_lock held. - */ -static void reset_psn(struct ipath_qp *qp, u32 psn) -{ - u32 n = qp->s_last; - struct ipath_swqe *wqe = get_swqe_ptr(qp, n); - u32 opcode; - - qp->s_cur = n; - - /* - * If we are starting the request from the beginning, - * let the normal send code handle initialization. - */ - if (ipath_cmp24(psn, wqe->psn) <= 0) { - qp->s_state = OP(SEND_LAST); - goto done; - } - - /* Find the work request opcode corresponding to the given PSN. */ - opcode = wqe->wr.opcode; - for (;;) { - int diff; - - if (++n == qp->s_size) - n = 0; - if (n == qp->s_tail) - break; - wqe = get_swqe_ptr(qp, n); - diff = ipath_cmp24(psn, wqe->psn); - if (diff < 0) - break; - qp->s_cur = n; - /* - * If we are starting the request from the beginning, - * let the normal send code handle initialization. - */ - if (diff == 0) { - qp->s_state = OP(SEND_LAST); - goto done; - } - opcode = wqe->wr.opcode; - } - - /* - * Set the state to restart in the middle of a request. - * Don't change the s_sge, s_cur_sge, or s_cur_size. - * See ipath_make_rc_req(). - */ - switch (opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); - break; - - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - qp->s_state = OP(RDMA_READ_RESPONSE_LAST); - break; - - case IB_WR_RDMA_READ: - qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); - break; - - default: - /* - * This case shouldn't happen since its only - * one PSN per req. - */ - qp->s_state = OP(SEND_LAST); - } -done: - qp->s_psn = psn; -} - -/** - * ipath_restart_rc - back up requester to resend the last un-ACKed request - * @qp: the QP to restart - * @psn: packet sequence number for the request - * @wc: the work completion request - * - * The QP s_lock should be held and interrupts disabled. - */ -void ipath_restart_rc(struct ipath_qp *qp, u32 psn) -{ - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - struct ipath_ibdev *dev; - - if (qp->s_retry == 0) { - ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); - goto bail; - } - qp->s_retry--; - - /* - * Remove the QP from the timeout queue. - * Note: it may already have been removed by ipath_ib_timer(). - */ - dev = to_idev(qp->ibqp.device); - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - - if (wqe->wr.opcode == IB_WR_RDMA_READ) - dev->n_rc_resends++; - else - dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; - - reset_psn(qp, psn); - ipath_schedule_send(qp); - -bail: - return; -} - -static inline void update_last_psn(struct ipath_qp *qp, u32 psn) -{ - qp->s_last_psn = psn; -} - -/** - * do_rc_ack - process an incoming RC ACK - * @qp: the QP the ACK came in on - * @psn: the packet sequence number of the ACK - * @opcode: the opcode of the request that resulted in the ACK - * - * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK - * for the given QP. - * Called at interrupt level with the QP s_lock held and interrupts disabled. - * Returns 1 if OK, 0 if current operation should be aborted (NAK). - */ -static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, - u64 val) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ib_wc wc; - enum ib_wc_status status; - struct ipath_swqe *wqe; - int ret = 0; - u32 ack_psn; - int diff; - - /* - * Remove the QP from the timeout queue (or RNR timeout queue). - * If ipath_ib_timer() has already removed it, - * it's OK since we hold the QP s_lock and ipath_restart_rc() - * just won't find anything to restart if we ACK everything. - */ - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - spin_unlock(&dev->pending_lock); - - /* - * Note that NAKs implicitly ACK outstanding SEND and RDMA write - * requests and implicitly NAK RDMA read and atomic requests issued - * before the NAK'ed request. The MSN won't include the NAK'ed - * request but will include an ACK'ed request(s). - */ - ack_psn = psn; - if (aeth >> 29) - ack_psn--; - wqe = get_swqe_ptr(qp, qp->s_last); - - /* - * The MSN might be for a later WQE than the PSN indicates so - * only complete WQEs that the PSN finishes. - */ - while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) { - /* - * RDMA_READ_RESPONSE_ONLY is a special case since - * we want to generate completion events for everything - * before the RDMA read, copy the data, then generate - * the completion for the read. - */ - if (wqe->wr.opcode == IB_WR_RDMA_READ && - opcode == OP(RDMA_READ_RESPONSE_ONLY) && - diff == 0) { - ret = 1; - goto bail; - } - /* - * If this request is a RDMA read or atomic, and the ACK is - * for a later operation, this ACK NAKs the RDMA read or - * atomic. In other words, only a RDMA_READ_LAST or ONLY - * can ACK a RDMA read and likewise for atomic ops. Note - * that the NAK case can only happen if relaxed ordering is - * used and requests are sent after an RDMA read or atomic - * is sent but before the response is received. - */ - if ((wqe->wr.opcode == IB_WR_RDMA_READ && - (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || - ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && - (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { - /* - * The last valid PSN seen is the previous - * request's. - */ - update_last_psn(qp, wqe->psn - 1); - /* Retry this request. */ - ipath_restart_rc(qp, wqe->psn); - /* - * No need to process the ACK/NAK since we are - * restarting an earlier request. - */ - goto bail; - } - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - *(u64 *) wqe->sg_list[0].vaddr = val; - if (qp->s_num_rd_atomic && - (wqe->wr.opcode == IB_WR_RDMA_READ || - wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { - qp->s_num_rd_atomic--; - /* Restart sending task if fence is complete */ - if (((qp->s_flags & IPATH_S_FENCE_PENDING) && - !qp->s_num_rd_atomic) || - qp->s_flags & IPATH_S_RDMAR_PENDING) - ipath_schedule_send(qp); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof wc); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); - } - qp->s_retry = qp->s_retry_cnt; - /* - * If we are completing a request which is in the process of - * being resent, we can stop resending it since we know the - * responder has already seen it. - */ - if (qp->s_last == qp->s_cur) { - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - qp->s_last = qp->s_cur; - if (qp->s_last == qp->s_tail) - break; - wqe = get_swqe_ptr(qp, qp->s_cur); - qp->s_state = OP(SEND_LAST); - qp->s_psn = wqe->psn; - } else { - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) - qp->s_draining = 0; - if (qp->s_last == qp->s_tail) - break; - wqe = get_swqe_ptr(qp, qp->s_last); - } - } - - switch (aeth >> 29) { - case 0: /* ACK */ - dev->n_rc_acks++; - /* If this is a partial ACK, reset the retransmit timer. */ - if (qp->s_last != qp->s_tail) { - spin_lock(&dev->pending_lock); - if (list_empty(&qp->timerwait)) - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); - /* - * If we get a partial ACK for a resent operation, - * we can stop resending the earlier packets and - * continue with the next packet the receiver wants. - */ - if (ipath_cmp24(qp->s_psn, psn) <= 0) { - reset_psn(qp, psn + 1); - ipath_schedule_send(qp); - } - } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { - qp->s_state = OP(SEND_LAST); - qp->s_psn = psn + 1; - } - ipath_get_credit(qp, aeth); - qp->s_rnr_retry = qp->s_rnr_retry_cnt; - qp->s_retry = qp->s_retry_cnt; - update_last_psn(qp, psn); - ret = 1; - goto bail; - - case 1: /* RNR NAK */ - dev->n_rnr_naks++; - if (qp->s_last == qp->s_tail) - goto bail; - if (qp->s_rnr_retry == 0) { - status = IB_WC_RNR_RETRY_EXC_ERR; - goto class_b; - } - if (qp->s_rnr_retry_cnt < 7) - qp->s_rnr_retry--; - - /* The last valid PSN is the previous PSN. */ - update_last_psn(qp, psn - 1); - - if (wqe->wr.opcode == IB_WR_RDMA_READ) - dev->n_rc_resends++; - else - dev->n_rc_resends += - (qp->s_psn - psn) & IPATH_PSN_MASK; - - reset_psn(qp, psn); - - qp->s_rnr_timeout = - ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & - IPATH_AETH_CREDIT_MASK]; - ipath_insert_rnr_queue(qp); - ipath_schedule_send(qp); - goto bail; - - case 3: /* NAK */ - if (qp->s_last == qp->s_tail) - goto bail; - /* The last valid PSN is the previous PSN. */ - update_last_psn(qp, psn - 1); - switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & - IPATH_AETH_CREDIT_MASK) { - case 0: /* PSN sequence error */ - dev->n_seq_naks++; - /* - * Back up to the responder's expected PSN. - * Note that we might get a NAK in the middle of an - * RDMA READ response which terminates the RDMA - * READ. - */ - ipath_restart_rc(qp, psn); - break; - - case 1: /* Invalid Request */ - status = IB_WC_REM_INV_REQ_ERR; - dev->n_other_naks++; - goto class_b; - - case 2: /* Remote Access Error */ - status = IB_WC_REM_ACCESS_ERR; - dev->n_other_naks++; - goto class_b; - - case 3: /* Remote Operation Error */ - status = IB_WC_REM_OP_ERR; - dev->n_other_naks++; - class_b: - ipath_send_complete(qp, wqe, status); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); - break; - - default: - /* Ignore other reserved NAK error codes */ - goto reserved; - } - qp->s_rnr_retry = qp->s_rnr_retry_cnt; - goto bail; - - default: /* 2: reserved */ - reserved: - /* Ignore reserved NAK codes. */ - goto bail; - } - -bail: - return ret; -} - -/** - * ipath_rc_rcv_resp - process an incoming RC response packet - * @dev: the device this packet came in on - * @ohdr: the other headers for this packet - * @data: the packet data - * @tlen: the packet length - * @qp: the QP for this packet - * @opcode: the opcode for this packet - * @psn: the packet sequence number for this packet - * @hdrsize: the header length - * @pmtu: the path MTU - * @header_in_data: true if part of the header data is in the data buffer - * - * This is called from ipath_rc_rcv() to process an incoming RC response - * packet for the given QP. - * Called at interrupt level. - */ -static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, - struct ipath_other_headers *ohdr, - void *data, u32 tlen, - struct ipath_qp *qp, - u32 opcode, - u32 psn, u32 hdrsize, u32 pmtu, - int header_in_data) -{ - struct ipath_swqe *wqe; - enum ib_wc_status status; - unsigned long flags; - int diff; - u32 pad; - u32 aeth; - u64 val; - - spin_lock_irqsave(&qp->s_lock, flags); - - /* Double check we can process this now that we hold the s_lock. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto ack_done; - - /* Ignore invalid responses. */ - if (ipath_cmp24(psn, qp->s_next_psn) >= 0) - goto ack_done; - - /* Ignore duplicate responses. */ - diff = ipath_cmp24(psn, qp->s_last_psn); - if (unlikely(diff <= 0)) { - /* Update credits for "ghost" ACKs */ - if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { - if (!header_in_data) - aeth = be32_to_cpu(ohdr->u.aeth); - else { - aeth = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } - if ((aeth >> 29) == 0) - ipath_get_credit(qp, aeth); - } - goto ack_done; - } - - if (unlikely(qp->s_last == qp->s_tail)) - goto ack_done; - wqe = get_swqe_ptr(qp, qp->s_last); - status = IB_WC_SUCCESS; - - switch (opcode) { - case OP(ACKNOWLEDGE): - case OP(ATOMIC_ACKNOWLEDGE): - case OP(RDMA_READ_RESPONSE_FIRST): - if (!header_in_data) - aeth = be32_to_cpu(ohdr->u.aeth); - else { - aeth = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - if (!header_in_data) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64) be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else - val = be64_to_cpu(((__be64 *) data)[0]); - } else - val = 0; - if (!do_rc_ack(qp, aeth, psn, opcode, val) || - opcode != OP(RDMA_READ_RESPONSE_FIRST)) - goto ack_done; - hdrsize += 4; - wqe = get_swqe_ptr(qp, qp->s_last); - if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) - goto ack_op_err; - qp->r_flags &= ~IPATH_R_RDMAR_SEQ; - /* - * If this is a response to a resent RDMA read, we - * have to be careful to copy the data to the right - * location. - */ - qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, - wqe, psn, pmtu); - goto read_middle; - - case OP(RDMA_READ_RESPONSE_MIDDLE): - /* no AETH, no ACK */ - if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { - dev->n_rdma_seq++; - if (qp->r_flags & IPATH_R_RDMAR_SEQ) - goto ack_done; - qp->r_flags |= IPATH_R_RDMAR_SEQ; - ipath_restart_rc(qp, qp->s_last_psn + 1); - goto ack_done; - } - if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) - goto ack_op_err; - read_middle: - if (unlikely(tlen != (hdrsize + pmtu + 4))) - goto ack_len_err; - if (unlikely(pmtu >= qp->s_rdma_read_len)) - goto ack_len_err; - - /* We got a response so update the timeout. */ - spin_lock(&dev->pending_lock); - if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) - list_move_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); - - if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) - qp->s_retry = qp->s_retry_cnt; - - /* - * Update the RDMA receive state but do the copy w/o - * holding the locks and blocking interrupts. - */ - qp->s_rdma_read_len -= pmtu; - update_last_psn(qp, psn); - spin_unlock_irqrestore(&qp->s_lock, flags); - ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); - goto bail; - - case OP(RDMA_READ_RESPONSE_ONLY): - if (!header_in_data) - aeth = be32_to_cpu(ohdr->u.aeth); - else - aeth = be32_to_cpu(((__be32 *) data)[0]); - if (!do_rc_ack(qp, aeth, psn, opcode, 0)) - goto ack_done; - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* - * Check that the data size is >= 0 && <= pmtu. - * Remember to account for the AETH header (4) and - * ICRC (4). - */ - if (unlikely(tlen < (hdrsize + pad + 8))) - goto ack_len_err; - /* - * If this is a response to a resent RDMA read, we - * have to be careful to copy the data to the right - * location. - */ - wqe = get_swqe_ptr(qp, qp->s_last); - qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, - wqe, psn, pmtu); - goto read_last; - - case OP(RDMA_READ_RESPONSE_LAST): - /* ACKs READ req. */ - if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { - dev->n_rdma_seq++; - if (qp->r_flags & IPATH_R_RDMAR_SEQ) - goto ack_done; - qp->r_flags |= IPATH_R_RDMAR_SEQ; - ipath_restart_rc(qp, qp->s_last_psn + 1); - goto ack_done; - } - if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) - goto ack_op_err; - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* - * Check that the data size is >= 1 && <= pmtu. - * Remember to account for the AETH header (4) and - * ICRC (4). - */ - if (unlikely(tlen <= (hdrsize + pad + 8))) - goto ack_len_err; - read_last: - tlen -= hdrsize + pad + 8; - if (unlikely(tlen != qp->s_rdma_read_len)) - goto ack_len_err; - if (!header_in_data) - aeth = be32_to_cpu(ohdr->u.aeth); - else { - aeth = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } - ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); - (void) do_rc_ack(qp, aeth, psn, - OP(RDMA_READ_RESPONSE_LAST), 0); - goto ack_done; - } - -ack_op_err: - status = IB_WC_LOC_QP_OP_ERR; - goto ack_err; - -ack_len_err: - status = IB_WC_LOC_LEN_ERR; -ack_err: - ipath_send_complete(qp, wqe, status); - ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); -ack_done: - spin_unlock_irqrestore(&qp->s_lock, flags); -bail: - return; -} - -/** - * ipath_rc_rcv_error - process an incoming duplicate or error RC packet - * @dev: the device this packet came in on - * @ohdr: the other headers for this packet - * @data: the packet data - * @qp: the QP for this packet - * @opcode: the opcode for this packet - * @psn: the packet sequence number for this packet - * @diff: the difference between the PSN and the expected PSN - * @header_in_data: true if part of the header data is in the data buffer - * - * This is called from ipath_rc_rcv() to process an unexpected - * incoming RC packet for the given QP. - * Called at interrupt level. - * Return 1 if no more processing is needed; otherwise return 0 to - * schedule a response to be sent. - */ -static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, - struct ipath_other_headers *ohdr, - void *data, - struct ipath_qp *qp, - u32 opcode, - u32 psn, - int diff, - int header_in_data) -{ - struct ipath_ack_entry *e; - u8 i, prev; - int old_req; - unsigned long flags; - - if (diff > 0) { - /* - * Packet sequence error. - * A NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if we already sent one. - */ - if (!qp->r_nak_state) { - qp->r_nak_state = IB_NAK_PSN_ERROR; - /* Use the expected PSN. */ - qp->r_ack_psn = qp->r_psn; - goto send_ack; - } - goto done; - } - - /* - * Handle a duplicate request. Don't re-execute SEND, RDMA - * write or atomic op. Don't NAK errors, just silently drop - * the duplicate request. Note that r_sge, r_len, and - * r_rcv_len may be in use so don't modify them. - * - * We are supposed to ACK the earliest duplicate PSN but we - * can coalesce an outstanding duplicate ACK. We have to - * send the earliest so that RDMA reads can be restarted at - * the requester's expected PSN. - * - * First, find where this duplicate PSN falls within the - * ACKs previously sent. - */ - psn &= IPATH_PSN_MASK; - e = NULL; - old_req = 1; - - spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this now that we hold the s_lock. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock_done; - - for (i = qp->r_head_ack_queue; ; i = prev) { - if (i == qp->s_tail_ack_queue) - old_req = 0; - if (i) - prev = i - 1; - else - prev = IPATH_MAX_RDMA_ATOMIC; - if (prev == qp->r_head_ack_queue) { - e = NULL; - break; - } - e = &qp->s_ack_queue[prev]; - if (!e->opcode) { - e = NULL; - break; - } - if (ipath_cmp24(psn, e->psn) >= 0) { - if (prev == qp->s_tail_ack_queue) - old_req = 0; - break; - } - } - switch (opcode) { - case OP(RDMA_READ_REQUEST): { - struct ib_reth *reth; - u32 offset; - u32 len; - - /* - * If we didn't find the RDMA read request in the ack queue, - * or the send tasklet is already backed up to send an - * earlier entry, we can ignore this request. - */ - if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) - goto unlock_done; - /* RETH comes after BTH */ - if (!header_in_data) - reth = &ohdr->u.rc.reth; - else { - reth = (struct ib_reth *)data; - data += sizeof(*reth); - } - /* - * Address range must be a subset of the original - * request and start on pmtu boundaries. - * We reuse the old ack_queue slot since the requester - * should not back up and request an earlier PSN for the - * same request. - */ - offset = ((psn - e->psn) & IPATH_PSN_MASK) * - ib_mtu_enum_to_int(qp->path_mtu); - len = be32_to_cpu(reth->length); - if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) - goto unlock_done; - if (len != 0) { - u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); - int ok; - - ok = ipath_rkey_ok(qp, &e->rdma_sge, - len, vaddr, rkey, - IB_ACCESS_REMOTE_READ); - if (unlikely(!ok)) - goto unlock_done; - } else { - e->rdma_sge.sg_list = NULL; - e->rdma_sge.num_sge = 0; - e->rdma_sge.sge.mr = NULL; - e->rdma_sge.sge.vaddr = NULL; - e->rdma_sge.sge.length = 0; - e->rdma_sge.sge.sge_length = 0; - } - e->psn = psn; - qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_tail_ack_queue = prev; - break; - } - - case OP(COMPARE_SWAP): - case OP(FETCH_ADD): { - /* - * If we didn't find the atomic request in the ack queue - * or the send tasklet is already backed up to send an - * earlier entry, we can ignore this request. - */ - if (!e || e->opcode != (u8) opcode || old_req) - goto unlock_done; - qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_tail_ack_queue = prev; - break; - } - - default: - if (old_req) - goto unlock_done; - /* - * Resend the most recent ACK if this request is - * after all the previous RDMA reads and atomics. - */ - if (i == qp->r_head_ack_queue) { - spin_unlock_irqrestore(&qp->s_lock, flags); - qp->r_nak_state = 0; - qp->r_ack_psn = qp->r_psn - 1; - goto send_ack; - } - /* - * Try to send a simple ACK to work around a Mellanox bug - * which doesn't accept a RDMA read response or atomic - * response as an ACK for earlier SENDs or RDMA writes. - */ - if (qp->r_head_ack_queue == qp->s_tail_ack_queue && - !(qp->s_flags & IPATH_S_ACK_PENDING) && - qp->s_ack_state == OP(ACKNOWLEDGE)) { - spin_unlock_irqrestore(&qp->s_lock, flags); - qp->r_nak_state = 0; - qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; - goto send_ack; - } - /* - * Resend the RDMA read or atomic op which - * ACKs this duplicate request. - */ - qp->s_ack_state = OP(ACKNOWLEDGE); - qp->s_tail_ack_queue = i; - break; - } - qp->r_nak_state = 0; - ipath_schedule_send(qp); - -unlock_done: - spin_unlock_irqrestore(&qp->s_lock, flags); -done: - return 1; - -send_ack: - return 0; -} - -void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) -{ - unsigned long flags; - int lastwqe; - - spin_lock_irqsave(&qp->s_lock, flags); - lastwqe = ipath_error_qp(qp, err); - spin_unlock_irqrestore(&qp->s_lock, flags); - - if (lastwqe) { - struct ib_event ev; - - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); - } -} - -static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) -{ - unsigned next; - - next = n + 1; - if (next > IPATH_MAX_RDMA_ATOMIC) - next = 0; - if (n == qp->s_tail_ack_queue) { - qp->s_tail_ack_queue = next; - qp->s_ack_state = OP(ACKNOWLEDGE); - } -} - -/** - * ipath_rc_rcv - process an incoming RC packet - * @dev: the device this packet came in on - * @hdr: the header of this packet - * @has_grh: true if the header has a GRH - * @data: the packet data - * @tlen: the packet length - * @qp: the QP for this packet - * - * This is called from ipath_qp_rcv() to process an incoming RC packet - * for the given QP. - * Called at interrupt level. - */ -void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp) -{ - struct ipath_other_headers *ohdr; - u32 opcode; - u32 hdrsize; - u32 psn; - u32 pad; - struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - int diff; - struct ib_reth *reth; - int header_in_data; - unsigned long flags; - - /* Validate the SLID. See Ch. 9.6.1.5 */ - if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) - goto done; - - /* Check for GRH */ - if (!has_grh) { - ohdr = &hdr->u.oth; - hdrsize = 8 + 12; /* LRH + BTH */ - psn = be32_to_cpu(ohdr->bth[2]); - header_in_data = 0; - } else { - ohdr = &hdr->u.l.oth; - hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ - /* - * The header with GRH is 60 bytes and the core driver sets - * the eager header buffer size to 56 bytes so the last 4 - * bytes of the BTH header (PSN) is in the data buffer. - */ - header_in_data = dev->dd->ipath_rcvhdrentsize == 16; - if (header_in_data) { - psn = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } else - psn = be32_to_cpu(ohdr->bth[2]); - } - - /* - * Process responses (ACKs) before anything else. Note that the - * packet sequence number will be for something in the send work - * queue rather than the expected receive packet sequence number. - * In other words, this QP is the requester. - */ - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && - opcode <= OP(ATOMIC_ACKNOWLEDGE)) { - ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, - hdrsize, pmtu, header_in_data); - goto done; - } - - /* Compute 24 bits worth of difference. */ - diff = ipath_cmp24(psn, qp->r_psn); - if (unlikely(diff)) { - if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, - psn, diff, header_in_data)) - goto done; - goto send_ack; - } - - /* Check for opcode sequence errors. */ - switch (qp->r_state) { - case OP(SEND_FIRST): - case OP(SEND_MIDDLE): - if (opcode == OP(SEND_MIDDLE) || - opcode == OP(SEND_LAST) || - opcode == OP(SEND_LAST_WITH_IMMEDIATE)) - break; - goto nack_inv; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_MIDDLE): - if (opcode == OP(RDMA_WRITE_MIDDLE) || - opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) - break; - goto nack_inv; - - default: - if (opcode == OP(SEND_MIDDLE) || - opcode == OP(SEND_LAST) || - opcode == OP(SEND_LAST_WITH_IMMEDIATE) || - opcode == OP(RDMA_WRITE_MIDDLE) || - opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) - goto nack_inv; - /* - * Note that it is up to the requester to not send a new - * RDMA read or atomic operation before receiving an ACK - * for the previous operation. - */ - break; - } - - memset(&wc, 0, sizeof wc); - - /* OK, process the packet. */ - switch (opcode) { - case OP(SEND_FIRST): - if (!ipath_get_rwqe(qp, 0)) - goto rnr_nak; - qp->r_rcv_len = 0; - /* FALLTHROUGH */ - case OP(SEND_MIDDLE): - case OP(RDMA_WRITE_MIDDLE): - send_middle: - /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) - goto nack_inv; - qp->r_rcv_len += pmtu; - if (unlikely(qp->r_rcv_len > qp->r_len)) - goto nack_inv; - ipath_copy_sge(&qp->r_sge, data, pmtu); - break; - - case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): - /* consume RWQE */ - if (!ipath_get_rwqe(qp, 1)) - goto rnr_nak; - goto send_last_imm; - - case OP(SEND_ONLY): - case OP(SEND_ONLY_WITH_IMMEDIATE): - if (!ipath_get_rwqe(qp, 0)) - goto rnr_nak; - qp->r_rcv_len = 0; - if (opcode == OP(SEND_ONLY)) - goto send_last; - /* FALLTHROUGH */ - case OP(SEND_LAST_WITH_IMMEDIATE): - send_last_imm: - if (header_in_data) { - wc.ex.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else { - /* Immediate data comes after BTH */ - wc.ex.imm_data = ohdr->u.imm_data; - } - hdrsize += 4; - wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ - case OP(SEND_LAST): - case OP(RDMA_WRITE_LAST): - send_last: - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* Check for invalid length. */ - /* XXX LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) - goto nack_inv; - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); - wc.byte_len = tlen + qp->r_rcv_len; - if (unlikely(wc.byte_len > qp->r_len)) - goto nack_inv; - ipath_copy_sge(&qp->r_sge, data, tlen); - qp->r_msn++; - if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) - break; - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; - if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || - opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; - else - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(1 << 23)) != 0); - break; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_ONLY): - case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) - goto nack_inv; - /* consume RWQE */ - /* RETH comes after BTH */ - if (!header_in_data) - reth = &ohdr->u.rc.reth; - else { - reth = (struct ib_reth *)data; - data += sizeof(*reth); - } - hdrsize += sizeof(*reth); - qp->r_len = be32_to_cpu(reth->length); - qp->r_rcv_len = 0; - if (qp->r_len != 0) { - u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); - int ok; - - /* Check rkey & NAK */ - ok = ipath_rkey_ok(qp, &qp->r_sge, - qp->r_len, vaddr, rkey, - IB_ACCESS_REMOTE_WRITE); - if (unlikely(!ok)) - goto nack_acc; - } else { - qp->r_sge.sg_list = NULL; - qp->r_sge.sge.mr = NULL; - qp->r_sge.sge.vaddr = NULL; - qp->r_sge.sge.length = 0; - qp->r_sge.sge.sge_length = 0; - } - if (opcode == OP(RDMA_WRITE_FIRST)) - goto send_middle; - else if (opcode == OP(RDMA_WRITE_ONLY)) - goto send_last; - if (!ipath_get_rwqe(qp, 1)) - goto rnr_nak; - goto send_last_imm; - - case OP(RDMA_READ_REQUEST): { - struct ipath_ack_entry *e; - u32 len; - u8 next; - - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) - goto nack_inv; - next = qp->r_head_ack_queue + 1; - if (next > IPATH_MAX_RDMA_ATOMIC) - next = 0; - spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this while holding the s_lock. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock; - if (unlikely(next == qp->s_tail_ack_queue)) { - if (!qp->s_ack_queue[next].sent) - goto nack_inv_unlck; - ipath_update_ack_queue(qp, next); - } - e = &qp->s_ack_queue[qp->r_head_ack_queue]; - /* RETH comes after BTH */ - if (!header_in_data) - reth = &ohdr->u.rc.reth; - else { - reth = (struct ib_reth *)data; - data += sizeof(*reth); - } - len = be32_to_cpu(reth->length); - if (len) { - u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); - int ok; - - /* Check rkey & NAK */ - ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, - rkey, IB_ACCESS_REMOTE_READ); - if (unlikely(!ok)) - goto nack_acc_unlck; - /* - * Update the next expected PSN. We add 1 later - * below, so only add the remainder here. - */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; - } else { - e->rdma_sge.sg_list = NULL; - e->rdma_sge.num_sge = 0; - e->rdma_sge.sge.mr = NULL; - e->rdma_sge.sge.vaddr = NULL; - e->rdma_sge.sge.length = 0; - e->rdma_sge.sge.sge_length = 0; - } - e->opcode = opcode; - e->sent = 0; - e->psn = psn; - /* - * We need to increment the MSN here instead of when we - * finish sending the result since a duplicate request would - * increment it more than once. - */ - qp->r_msn++; - qp->r_psn++; - qp->r_state = opcode; - qp->r_nak_state = 0; - qp->r_head_ack_queue = next; - - /* Schedule the send tasklet. */ - ipath_schedule_send(qp); - - goto unlock; - } - - case OP(COMPARE_SWAP): - case OP(FETCH_ADD): { - struct ib_atomic_eth *ateth; - struct ipath_ack_entry *e; - u64 vaddr; - atomic64_t *maddr; - u64 sdata; - u32 rkey; - u8 next; - - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) - goto nack_inv; - next = qp->r_head_ack_queue + 1; - if (next > IPATH_MAX_RDMA_ATOMIC) - next = 0; - spin_lock_irqsave(&qp->s_lock, flags); - /* Double check we can process this while holding the s_lock. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock; - if (unlikely(next == qp->s_tail_ack_queue)) { - if (!qp->s_ack_queue[next].sent) - goto nack_inv_unlck; - ipath_update_ack_queue(qp, next); - } - if (!header_in_data) - ateth = &ohdr->u.atomic_eth; - else - ateth = (struct ib_atomic_eth *)data; - vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); - if (unlikely(vaddr & (sizeof(u64) - 1))) - goto nack_inv_unlck; - rkey = be32_to_cpu(ateth->rkey); - /* Check rkey & NAK */ - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, - sizeof(u64), vaddr, rkey, - IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc_unlck; - /* Perform atomic OP and save result. */ - maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); - e = &qp->s_ack_queue[qp->r_head_ack_queue]; - e->atomic_data = (opcode == OP(FETCH_ADD)) ? - (u64) atomic64_add_return(sdata, maddr) - sdata : - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), - sdata); - e->opcode = opcode; - e->sent = 0; - e->psn = psn & IPATH_PSN_MASK; - qp->r_msn++; - qp->r_psn++; - qp->r_state = opcode; - qp->r_nak_state = 0; - qp->r_head_ack_queue = next; - - /* Schedule the send tasklet. */ - ipath_schedule_send(qp); - - goto unlock; - } - - default: - /* NAK unknown opcodes. */ - goto nack_inv; - } - qp->r_psn++; - qp->r_state = opcode; - qp->r_ack_psn = psn; - qp->r_nak_state = 0; - /* Send an ACK if requested or required. */ - if (psn & (1 << 31)) - goto send_ack; - goto done; - -rnr_nak: - qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; - qp->r_ack_psn = qp->r_psn; - goto send_ack; - -nack_inv_unlck: - spin_unlock_irqrestore(&qp->s_lock, flags); -nack_inv: - ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - qp->r_nak_state = IB_NAK_INVALID_REQUEST; - qp->r_ack_psn = qp->r_psn; - goto send_ack; - -nack_acc_unlck: - spin_unlock_irqrestore(&qp->s_lock, flags); -nack_acc: - ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); - qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; - qp->r_ack_psn = qp->r_psn; -send_ack: - send_rc_ack(qp); - goto done; - -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); -done: - return; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_registers.h b/kernel/drivers/infiniband/hw/ipath/ipath_registers.h deleted file mode 100644 index 8f44d0cf3..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_registers.h +++ /dev/null @@ -1,512 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _IPATH_REGISTERS_H -#define _IPATH_REGISTERS_H - -/* - * This file should only be included by kernel source, and by the diags. It - * defines the registers, and their contents, for InfiniPath chips. - */ - -/* - * These are the InfiniPath register and buffer bit definitions, - * that are visible to software, and needed only by the kernel - * and diag code. A few, that are visible to protocol and user - * code are in ipath_common.h. Some bits are specific - * to a given chip implementation, and have been moved to the - * chip-specific source file - */ - -/* kr_revision bits */ -#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF -#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0 -#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF -#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8 -#define INFINIPATH_R_ARCH_MASK 0xFF -#define INFINIPATH_R_ARCH_SHIFT 16 -#define INFINIPATH_R_SOFTWARE_MASK 0xFF -#define INFINIPATH_R_SOFTWARE_SHIFT 24 -#define INFINIPATH_R_BOARDID_MASK 0xFF -#define INFINIPATH_R_BOARDID_SHIFT 32 - -/* kr_control bits */ -#define INFINIPATH_C_FREEZEMODE 0x00000002 -#define INFINIPATH_C_LINKENABLE 0x00000004 - -/* kr_sendctrl bits */ -#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16 -#define INFINIPATH_S_UPDTHRESH_SHIFT 24 -#define INFINIPATH_S_UPDTHRESH_MASK 0x1f - -#define IPATH_S_ABORT 0 -#define IPATH_S_PIOINTBUFAVAIL 1 -#define IPATH_S_PIOBUFAVAILUPD 2 -#define IPATH_S_PIOENABLE 3 -#define IPATH_S_SDMAINTENABLE 9 -#define IPATH_S_SDMASINGLEDESCRIPTOR 10 -#define IPATH_S_SDMAENABLE 11 -#define IPATH_S_SDMAHALT 12 -#define IPATH_S_DISARM 31 - -#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT) -#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL) -#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD) -#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE) -#define INFINIPATH_S_SDMAINTENABLE (1U << IPATH_S_SDMAINTENABLE) -#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \ - (1U << IPATH_S_SDMASINGLEDESCRIPTOR) -#define INFINIPATH_S_SDMAENABLE (1U << IPATH_S_SDMAENABLE) -#define INFINIPATH_S_SDMAHALT (1U << IPATH_S_SDMAHALT) -#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM) - -/* kr_rcvctrl bits that are the same on multiple chips */ -#define INFINIPATH_R_PORTENABLE_SHIFT 0 -#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38) - -/* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_SDMAINT 0x8000000000000000ULL -#define INFINIPATH_I_SDMADISABLED 0x4000000000000000ULL -#define INFINIPATH_I_ERROR 0x0000000080000000ULL -#define INFINIPATH_I_SPIOSENT 0x0000000040000000ULL -#define INFINIPATH_I_SPIOBUFAVAIL 0x0000000020000000ULL -#define INFINIPATH_I_GPIO 0x0000000010000000ULL -#define INFINIPATH_I_JINT 0x0000000004000000ULL - -/* kr_errorstatus, kr_errorclear, kr_errormask bits */ -#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL -#define INFINIPATH_E_RVCRC 0x0000000000000002ULL -#define INFINIPATH_E_RICRC 0x0000000000000004ULL -#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL -#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL -#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL -#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL -#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL -#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL -#define INFINIPATH_E_REBP 0x0000000000000200ULL -#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL -#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL -#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL -#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL -#define INFINIPATH_E_RBADTID 0x0000000000004000ULL -#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL -#define INFINIPATH_E_RHDR 0x0000000000010000ULL -#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL -#define INFINIPATH_E_SENDSPECIALTRIGGER 0x0000000008000000ULL -#define INFINIPATH_E_SDMADISABLED 0x0000000010000000ULL -#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL -#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL -#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL -#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL -#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL -#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL -#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL -#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL -#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL -#define INFINIPATH_E_SENDBUFMISUSE 0x0000004000000000ULL -#define INFINIPATH_E_SDMAGENMISMATCH 0x0000008000000000ULL -#define INFINIPATH_E_SDMAOUTOFBOUND 0x0000010000000000ULL -#define INFINIPATH_E_SDMATAILOUTOFBOUND 0x0000020000000000ULL -#define INFINIPATH_E_SDMABASE 0x0000040000000000ULL -#define INFINIPATH_E_SDMA1STDESC 0x0000080000000000ULL -#define INFINIPATH_E_SDMARPYTAG 0x0000100000000000ULL -#define INFINIPATH_E_SDMADWEN 0x0000200000000000ULL -#define INFINIPATH_E_SDMAMISSINGDW 0x0000400000000000ULL -#define INFINIPATH_E_SDMAUNEXPDATA 0x0000800000000000ULL -#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL -#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL -#define INFINIPATH_E_RESET 0x0004000000000000ULL -#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL -#define INFINIPATH_E_SDMADESCADDRMISALIGN 0x0010000000000000ULL -#define INFINIPATH_E_INVALIDEEPCMD 0x0020000000000000ULL - -/* - * this is used to print "common" packet errors only when the - * __IPATH_ERRPKTDBG bit is set in ipath_debug. - */ -#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \ - | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \ - | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \ - | INFINIPATH_E_REBP ) - -/* Convenience for decoding Send DMA errors */ -#define INFINIPATH_E_SDMAERRS ( \ - INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \ - INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \ - INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \ - INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \ - INFINIPATH_E_SDMAUNEXPDATA | \ - INFINIPATH_E_SDMADESCADDRMISALIGN | \ - INFINIPATH_E_SDMADISABLED | \ - INFINIPATH_E_SENDBUFMISUSE) - -/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ -/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo - * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID - * bit 4: flag buffer, 5: datainfo, 6: header info */ -#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL -#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 -#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44 -#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL -#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL -/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */ -#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL -#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL -#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL -/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ -#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL -/* waldo specific -- find the rest in ipath_6110.c */ -#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL -/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */ -#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL - -/* kr_hwdiagctrl bits */ -#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL -#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40 -#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL -#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44 -#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL -#define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL -#define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL -#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL -#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL - -/* kr_ibcctrl bits */ -#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL -#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0 -#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL -#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8 -#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL -#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1 -/* cycle through TS1/TS2 till OK */ -#define INFINIPATH_IBCC_LINKINITCMD_POLL 2 -/* wait for TS1, then go on */ -#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3 -#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16 -#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL -#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ -#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ -#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ -#define INFINIPATH_IBCC_LINKCMD_SHIFT 18 -#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL -#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20 -#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL -#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32 -#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL -#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36 -#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL -#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40 -#define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL -#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL - -/* kr_ibcstatus bits */ -#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0 -#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7 - -#define INFINIPATH_IBCS_TXREADY 0x40000000 -#define INFINIPATH_IBCS_TXCREDITOK 0x80000000 -/* link training states (shift by - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */ -#define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00 -#define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01 -#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02 -#define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03 -#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04 -#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05 -#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08 -#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09 -#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a -#define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b -#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c -#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e -#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f -/* link state machine states (shift by ibcs_ls_shift) */ -#define INFINIPATH_IBCS_L_STATE_DOWN 0x0 -#define INFINIPATH_IBCS_L_STATE_INIT 0x1 -#define INFINIPATH_IBCS_L_STATE_ARM 0x2 -#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3 -#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4 - - -/* kr_extstatus bits */ -#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1 -#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL -#define INFINIPATH_EXTS_GPIOIN_SHIFT 48 - -/* kr_extctrl bits */ -#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL -#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32 -#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL -#define INFINIPATH_EXTC_GPIOOE_SHIFT 48 -#define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL -#define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL -#define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL -#define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL -#define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL -#define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL -#define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL -#define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL -#define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL -#define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL -#define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL -#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL -#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL - -/* kr_partitionkey bits */ -#define INFINIPATH_PKEY_SIZE 16 -#define INFINIPATH_PKEY_MASK 0xFFFF -#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF - -/* kr_serdesconfig0 bits */ -#define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */ -#define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */ -/* tx idle enables (per lane) */ -#define INFINIPATH_SERDC0_TXIDLE 0xF000ULL -/* rx detect enables (per lane) */ -#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL -/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */ -#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL - -/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */ -#define INFINIPATH_XGXS_RX_POL_SHIFT 19 -#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL - - -/* - * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our - * PIO send buffers. This is well beyond anything currently - * defined in the InfiniBand spec. - */ -#define IPATH_PIO_MAXIBHDR 128 - -typedef u64 ipath_err_t; - -/* The following change with the type of device, so - * need to be part of the ipath_devdata struct, or - * we could have problems plugging in devices of - * different types (e.g. one HT, one PCIE) - * in one system, to be managed by one driver. - * On the other hand, this file is may also be included - * by other code, so leave the declarations here - * temporarily. Minor footprint issue if common-model - * linker used, none if C89+ linker used. - */ - -/* mask of defined bits for various registers */ -extern u64 infinipath_i_bitsextant; -extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; - -/* masks that are different in various chips, or only exist in some chips */ -extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; - -/* - * These are the infinipath general register numbers (not offsets). - * The kernel registers are used directly, those beyond the kernel - * registers are calculated from one of the base registers. The use of - * an integer type doesn't allow type-checking as thorough as, say, - * an enum but allows for better hiding of chip differences. - */ -typedef const u16 ipath_kreg, /* infinipath general registers */ - ipath_creg, /* infinipath counter registers */ - ipath_sreg; /* kernel-only, infinipath send registers */ - -/* - * These are the chip registers common to all infinipath chips, and - * used both by the kernel and the diagnostics or other user code. - * They are all implemented such that 64 bit accesses work. - * Some implement no more than 32 bits. Because 64 bit reads - * require 2 HT cmds on opteron, we access those with 32 bit - * reads for efficiency (they are written as 64 bits, since - * the extra 32 bits are nearly free on writes, and it slightly reduces - * complexity). The rest are all accessed as 64 bits. - */ -struct ipath_kregs { - /* These are the 32 bit group */ - ipath_kreg kr_control; - ipath_kreg kr_counterregbase; - ipath_kreg kr_intmask; - ipath_kreg kr_intstatus; - ipath_kreg kr_pagealign; - ipath_kreg kr_portcnt; - ipath_kreg kr_rcvtidbase; - ipath_kreg kr_rcvtidcnt; - ipath_kreg kr_rcvegrbase; - ipath_kreg kr_rcvegrcnt; - ipath_kreg kr_scratch; - ipath_kreg kr_sendctrl; - ipath_kreg kr_sendpiobufbase; - ipath_kreg kr_sendpiobufcnt; - ipath_kreg kr_sendpiosize; - ipath_kreg kr_sendregbase; - ipath_kreg kr_userregbase; - /* These are the 64 bit group */ - ipath_kreg kr_debugport; - ipath_kreg kr_debugportselect; - ipath_kreg kr_errorclear; - ipath_kreg kr_errormask; - ipath_kreg kr_errorstatus; - ipath_kreg kr_extctrl; - ipath_kreg kr_extstatus; - ipath_kreg kr_gpio_clear; - ipath_kreg kr_gpio_mask; - ipath_kreg kr_gpio_out; - ipath_kreg kr_gpio_status; - ipath_kreg kr_hwdiagctrl; - ipath_kreg kr_hwerrclear; - ipath_kreg kr_hwerrmask; - ipath_kreg kr_hwerrstatus; - ipath_kreg kr_ibcctrl; - ipath_kreg kr_ibcstatus; - ipath_kreg kr_intblocked; - ipath_kreg kr_intclear; - ipath_kreg kr_interruptconfig; - ipath_kreg kr_mdio; - ipath_kreg kr_partitionkey; - ipath_kreg kr_rcvbthqp; - ipath_kreg kr_rcvbufbase; - ipath_kreg kr_rcvbufsize; - ipath_kreg kr_rcvctrl; - ipath_kreg kr_rcvhdrcnt; - ipath_kreg kr_rcvhdrentsize; - ipath_kreg kr_rcvhdrsize; - ipath_kreg kr_rcvintmembase; - ipath_kreg kr_rcvintmemsize; - ipath_kreg kr_revision; - ipath_kreg kr_sendbuffererror; - ipath_kreg kr_sendpioavailaddr; - ipath_kreg kr_serdesconfig0; - ipath_kreg kr_serdesconfig1; - ipath_kreg kr_serdesstatus; - ipath_kreg kr_txintmembase; - ipath_kreg kr_txintmemsize; - ipath_kreg kr_xgxsconfig; - ipath_kreg kr_ibpllcfg; - /* use these two (and the following N ports) only with - * ipath_k*_kreg64_port(); not *kreg64() */ - ipath_kreg kr_rcvhdraddr; - ipath_kreg kr_rcvhdrtailaddr; - - /* remaining registers are not present on all types of infinipath - chips */ - ipath_kreg kr_rcvpktledcnt; - ipath_kreg kr_pcierbuftestreg0; - ipath_kreg kr_pcierbuftestreg1; - ipath_kreg kr_pcieq0serdesconfig0; - ipath_kreg kr_pcieq0serdesconfig1; - ipath_kreg kr_pcieq0serdesstatus; - ipath_kreg kr_pcieq1serdesconfig0; - ipath_kreg kr_pcieq1serdesconfig1; - ipath_kreg kr_pcieq1serdesstatus; - ipath_kreg kr_hrtbt_guid; - ipath_kreg kr_ibcddrctrl; - ipath_kreg kr_ibcddrstatus; - ipath_kreg kr_jintreload; - - /* send dma related regs */ - ipath_kreg kr_senddmabase; - ipath_kreg kr_senddmalengen; - ipath_kreg kr_senddmatail; - ipath_kreg kr_senddmahead; - ipath_kreg kr_senddmaheadaddr; - ipath_kreg kr_senddmabufmask0; - ipath_kreg kr_senddmabufmask1; - ipath_kreg kr_senddmabufmask2; - ipath_kreg kr_senddmastatus; - - /* SerDes related regs (IBA7220-only) */ - ipath_kreg kr_ibserdesctrl; - ipath_kreg kr_ib_epbacc; - ipath_kreg kr_ib_epbtrans; - ipath_kreg kr_pcie_epbacc; - ipath_kreg kr_pcie_epbtrans; - ipath_kreg kr_ib_ddsrxeq; -}; - -struct ipath_cregs { - ipath_creg cr_badformatcnt; - ipath_creg cr_erricrccnt; - ipath_creg cr_errlinkcnt; - ipath_creg cr_errlpcrccnt; - ipath_creg cr_errpkey; - ipath_creg cr_errrcvflowctrlcnt; - ipath_creg cr_err_rlencnt; - ipath_creg cr_errslencnt; - ipath_creg cr_errtidfull; - ipath_creg cr_errtidvalid; - ipath_creg cr_errvcrccnt; - ipath_creg cr_ibstatuschange; - ipath_creg cr_intcnt; - ipath_creg cr_invalidrlencnt; - ipath_creg cr_invalidslencnt; - ipath_creg cr_lbflowstallcnt; - ipath_creg cr_iblinkdowncnt; - ipath_creg cr_iblinkerrrecovcnt; - ipath_creg cr_ibsymbolerrcnt; - ipath_creg cr_pktrcvcnt; - ipath_creg cr_pktrcvflowctrlcnt; - ipath_creg cr_pktsendcnt; - ipath_creg cr_pktsendflowcnt; - ipath_creg cr_portovflcnt; - ipath_creg cr_rcvebpcnt; - ipath_creg cr_rcvovflcnt; - ipath_creg cr_rxdroppktcnt; - ipath_creg cr_senddropped; - ipath_creg cr_sendstallcnt; - ipath_creg cr_sendunderruncnt; - ipath_creg cr_unsupvlcnt; - ipath_creg cr_wordrcvcnt; - ipath_creg cr_wordsendcnt; - ipath_creg cr_vl15droppedpktcnt; - ipath_creg cr_rxotherlocalphyerrcnt; - ipath_creg cr_excessbufferovflcnt; - ipath_creg cr_locallinkintegrityerrcnt; - ipath_creg cr_rxvlerrcnt; - ipath_creg cr_rxdlidfltrcnt; - ipath_creg cr_psstat; - ipath_creg cr_psstart; - ipath_creg cr_psinterval; - ipath_creg cr_psrcvdatacount; - ipath_creg cr_psrcvpktscount; - ipath_creg cr_psxmitdatacount; - ipath_creg cr_psxmitpktscount; - ipath_creg cr_psxmitwaitcount; -}; - -#endif /* _IPATH_REGISTERS_H */ diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_ruc.c b/kernel/drivers/infiniband/hw/ipath/ipath_ruc.c deleted file mode 100644 index 1f95bbaf7..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_ruc.c +++ /dev/null @@ -1,734 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/sched.h> -#include <linux/spinlock.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/* - * Convert the AETH RNR timeout code into the number of milliseconds. - */ -const u32 ib_ipath_rnr_table[32] = { - 656, /* 0 */ - 1, /* 1 */ - 1, /* 2 */ - 1, /* 3 */ - 1, /* 4 */ - 1, /* 5 */ - 1, /* 6 */ - 1, /* 7 */ - 1, /* 8 */ - 1, /* 9 */ - 1, /* A */ - 1, /* B */ - 1, /* C */ - 1, /* D */ - 2, /* E */ - 2, /* F */ - 3, /* 10 */ - 4, /* 11 */ - 6, /* 12 */ - 8, /* 13 */ - 11, /* 14 */ - 16, /* 15 */ - 21, /* 16 */ - 31, /* 17 */ - 41, /* 18 */ - 62, /* 19 */ - 82, /* 1A */ - 123, /* 1B */ - 164, /* 1C */ - 246, /* 1D */ - 328, /* 1E */ - 492 /* 1F */ -}; - -/** - * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device - * @qp: the QP - * - * Called with the QP s_lock held and interrupts disabled. - * XXX Use a simple list for now. We might need a priority - * queue if we have lots of QPs waiting for RNR timeouts - * but that should be rare. - */ -void ipath_insert_rnr_queue(struct ipath_qp *qp) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - - /* We already did a spin_lock_irqsave(), so just use spin_lock */ - spin_lock(&dev->pending_lock); - if (list_empty(&dev->rnrwait)) - list_add(&qp->timerwait, &dev->rnrwait); - else { - struct list_head *l = &dev->rnrwait; - struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp, - timerwait); - - while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) { - qp->s_rnr_timeout -= nqp->s_rnr_timeout; - l = l->next; - if (l->next == &dev->rnrwait) { - nqp = NULL; - break; - } - nqp = list_entry(l->next, struct ipath_qp, - timerwait); - } - if (nqp) - nqp->s_rnr_timeout -= qp->s_rnr_timeout; - list_add(&qp->timerwait, l); - } - spin_unlock(&dev->pending_lock); -} - -/** - * ipath_init_sge - Validate a RWQE and fill in the SGE state - * @qp: the QP - * - * Return 1 if OK. - */ -int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, - u32 *lengthp, struct ipath_sge_state *ss) -{ - int i, j, ret; - struct ib_wc wc; - - *lengthp = 0; - for (i = j = 0; i < wqe->num_sge; i++) { - if (wqe->sg_list[i].length == 0) - continue; - /* Check LKEY */ - if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) - goto bad_lkey; - *lengthp += wqe->sg_list[i].length; - j++; - } - ss->num_sge = j; - ret = 1; - goto bail; - -bad_lkey: - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr_id; - wc.status = IB_WC_LOC_PROT_ERR; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - /* Signal solicited completion event. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); - ret = 0; -bail: - return ret; -} - -/** - * ipath_get_rwqe - copy the next RWQE into the QP's RWQE - * @qp: the QP - * @wr_id_only: update qp->r_wr_id only, not qp->r_sge - * - * Return 0 if no RWQE is available, otherwise return 1. - * - * Can be called from interrupt level. - */ -int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) -{ - unsigned long flags; - struct ipath_rq *rq; - struct ipath_rwq *wq; - struct ipath_srq *srq; - struct ipath_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - u32 tail; - int ret; - - if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - spin_lock_irqsave(&rq->lock, flags); - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { - ret = 0; - goto unlock; - } - - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - do { - if (unlikely(tail == wq->head)) { - ret = 0; - goto unlock; - } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = get_rwqe_ptr(rq, tail); - if (++tail >= rq->size) - tail = 0; - if (wr_id_only) - break; - qp->r_sge.sg_list = qp->r_sg_list; - } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge)); - qp->r_wr_id = wqe->wr_id; - wq->tail = tail; - - ret = 1; - set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); - if (handler) { - u32 n; - - /* - * validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; - } - } -unlock: - spin_unlock_irqrestore(&rq->lock, flags); -bail: - return ret; -} - -/** - * ipath_ruc_loopback - handle UC and RC lookback requests - * @sqp: the sending QP - * - * This is called from ipath_do_send() to - * forward a WQE addressed to the same HCA. - * Note that although we are single threaded due to the tasklet, we still - * have to protect against post_send(). We don't have to worry about - * receive interrupts since this is a connected protocol and all packets - * will pass through here. - */ -static void ipath_ruc_loopback(struct ipath_qp *sqp) -{ - struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); - struct ipath_qp *qp; - struct ipath_swqe *wqe; - struct ipath_sge *sge; - unsigned long flags; - struct ib_wc wc; - u64 sdata; - atomic64_t *maddr; - enum ib_wc_status send_status; - - /* - * Note that we check the responder QP state after - * checking the requester's state. - */ - qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); - - spin_lock_irqsave(&sqp->s_lock, flags); - - /* Return if we are already busy processing a work request. */ - if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || - !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) - goto unlock; - - sqp->s_flags |= IPATH_S_BUSY; - -again: - if (sqp->s_last == sqp->s_head) - goto clr_busy; - wqe = get_swqe_ptr(sqp, sqp->s_last); - - /* Return if it is not OK to start a new work reqeust. */ - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { - if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) - goto clr_busy; - /* We are in the error state, flush the work request. */ - send_status = IB_WC_WR_FLUSH_ERR; - goto flush_send; - } - - /* - * We can rely on the entry not changing without the s_lock - * being held until we update s_last. - * We increment s_cur to indicate s_last is in progress. - */ - if (sqp->s_last == sqp->s_cur) { - if (++sqp->s_cur >= sqp->s_size) - sqp->s_cur = 0; - } - spin_unlock_irqrestore(&sqp->s_lock, flags); - - if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { - dev->n_pkt_drops++; - /* - * For RC, the requester would timeout and retry so - * shortcut the timeouts and just signal too many retries. - */ - if (sqp->ibqp.qp_type == IB_QPT_RC) - send_status = IB_WC_RETRY_EXC_ERR; - else - send_status = IB_WC_SUCCESS; - goto serr; - } - - memset(&wc, 0, sizeof wc); - send_status = IB_WC_SUCCESS; - - sqp->s_sge.sge = wqe->sg_list[0]; - sqp->s_sge.sg_list = wqe->sg_list + 1; - sqp->s_sge.num_sge = wqe->wr.num_sge; - sqp->s_len = wqe->length; - switch (wqe->wr.opcode) { - case IB_WR_SEND_WITH_IMM: - wc.wc_flags = IB_WC_WITH_IMM; - wc.ex.imm_data = wqe->wr.ex.imm_data; - /* FALLTHROUGH */ - case IB_WR_SEND: - if (!ipath_get_rwqe(qp, 0)) - goto rnr_nak; - break; - - case IB_WR_RDMA_WRITE_WITH_IMM: - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) - goto inv_err; - wc.wc_flags = IB_WC_WITH_IMM; - wc.ex.imm_data = wqe->wr.ex.imm_data; - if (!ipath_get_rwqe(qp, 1)) - goto rnr_nak; - /* FALLTHROUGH */ - case IB_WR_RDMA_WRITE: - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) - goto inv_err; - if (wqe->length == 0) - break; - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, - IB_ACCESS_REMOTE_WRITE))) - goto acc_err; - break; - - case IB_WR_RDMA_READ: - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) - goto inv_err; - if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, - IB_ACCESS_REMOTE_READ))) - goto acc_err; - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->wr.num_sge; - break; - - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) - goto inv_err; - if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), - wqe->wr.wr.atomic.remote_addr, - wqe->wr.wr.atomic.rkey, - IB_ACCESS_REMOTE_ATOMIC))) - goto acc_err; - /* Perform atomic OP and save result. */ - maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = wqe->wr.wr.atomic.compare_add; - *(u64 *) sqp->s_sge.sge.vaddr = - (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? - (u64) atomic64_add_return(sdata, maddr) - sdata : - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); - goto send_comp; - - default: - send_status = IB_WC_LOC_QP_OP_ERR; - goto serr; - } - - sge = &sqp->s_sge.sge; - while (sqp->s_len) { - u32 len = sqp->s_len; - - if (len > sge->length) - len = sge->length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - ipath_copy_sge(&qp->r_sge, sge->vaddr, len); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--sqp->s_sge.num_sge) - *sge = *sqp->s_sge.sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - sqp->s_len -= len; - } - - if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) - goto send_comp; - - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; - else - wc.opcode = IB_WC_RECV; - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - wc.port_num = 1; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - wqe->wr.send_flags & IB_SEND_SOLICITED); - -send_comp: - spin_lock_irqsave(&sqp->s_lock, flags); -flush_send: - sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; - ipath_send_complete(sqp, wqe, send_status); - goto again; - -rnr_nak: - /* Handle RNR NAK */ - if (qp->ibqp.qp_type == IB_QPT_UC) - goto send_comp; - /* - * Note: we don't need the s_lock held since the BUSY flag - * makes this single threaded. - */ - if (sqp->s_rnr_retry == 0) { - send_status = IB_WC_RNR_RETRY_EXC_ERR; - goto serr; - } - if (sqp->s_rnr_retry_cnt < 7) - sqp->s_rnr_retry--; - spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) - goto clr_busy; - sqp->s_flags |= IPATH_S_WAITING; - dev->n_rnr_naks++; - sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; - ipath_insert_rnr_queue(sqp); - goto clr_busy; - -inv_err: - send_status = IB_WC_REM_INV_REQ_ERR; - wc.status = IB_WC_LOC_QP_OP_ERR; - goto err; - -acc_err: - send_status = IB_WC_REM_ACCESS_ERR; - wc.status = IB_WC_LOC_PROT_ERR; -err: - /* responder goes to error state */ - ipath_rc_error(qp, wc.status); - -serr: - spin_lock_irqsave(&sqp->s_lock, flags); - ipath_send_complete(sqp, wqe, send_status); - if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); - - sqp->s_flags &= ~IPATH_S_BUSY; - spin_unlock_irqrestore(&sqp->s_lock, flags); - if (lastwqe) { - struct ib_event ev; - - ev.device = sqp->ibqp.device; - ev.element.qp = &sqp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); - } - goto done; - } -clr_busy: - sqp->s_flags &= ~IPATH_S_BUSY; -unlock: - spin_unlock_irqrestore(&sqp->s_lock, flags); -done: - if (qp && atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); -} - -static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) -{ - if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || - qp->ibqp.qp_type == IB_QPT_SMI) { - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } -} - -/** - * ipath_no_bufs_available - tell the layer driver we need buffers - * @qp: the QP that caused the problem - * @dev: the device we ran out of buffers on - * - * Called when we run out of PIO buffers. - * If we are now in the error state, return zero to flush the - * send work request. - */ -static int ipath_no_bufs_available(struct ipath_qp *qp, - struct ipath_ibdev *dev) -{ - unsigned long flags; - int ret = 1; - - /* - * Note that as soon as want_buffer() is called and - * possibly before it returns, ipath_ib_piobufavail() - * could be called. Therefore, put QP on the piowait list before - * enabling the PIO avail interrupt. - */ - spin_lock_irqsave(&qp->s_lock, flags); - if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { - dev->n_piowait++; - qp->s_flags |= IPATH_S_WAITING; - qp->s_flags &= ~IPATH_S_BUSY; - spin_lock(&dev->pending_lock); - if (list_empty(&qp->piowait)) - list_add_tail(&qp->piowait, &dev->piowait); - spin_unlock(&dev->pending_lock); - } else - ret = 0; - spin_unlock_irqrestore(&qp->s_lock, flags); - if (ret) - want_buffer(dev->dd, qp); - return ret; -} - -/** - * ipath_make_grh - construct a GRH header - * @dev: a pointer to the ipath device - * @hdr: a pointer to the GRH header being constructed - * @grh: the global route address to send to - * @hwords: the number of 32 bit words of header being sent - * @nwords: the number of 32 bit words of data being sent - * - * Return the size of the header in 32 bit words. - */ -u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, - struct ib_global_route *grh, u32 hwords, u32 nwords) -{ - hdr->version_tclass_flow = - cpu_to_be32((6 << 28) | - (grh->traffic_class << 20) | - grh->flow_label); - hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); - /* next_hdr is defined by C8-7 in ch. 8.4.1 */ - hdr->next_hdr = 0x1B; - hdr->hop_limit = grh->hop_limit; - /* The SGID is 32-bit aligned. */ - hdr->sgid.global.subnet_prefix = dev->gid_prefix; - hdr->sgid.global.interface_id = dev->dd->ipath_guid; - hdr->dgid = grh->dgid; - - /* GRH header size in 32-bit words. */ - return sizeof(struct ib_grh) / sizeof(u32); -} - -void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 bth0, u32 bth2) -{ - u16 lrh0; - u32 nwords; - u32 extra_bytes; - - /* Construct the header. */ - extra_bytes = -qp->s_cur_size & 3; - nwords = (qp->s_cur_size + extra_bytes) >> 2; - lrh0 = IPATH_LRH_BTH; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, - &qp->remote_ah_attr.grh, - qp->s_hdrwords, nwords); - lrh0 = IPATH_LRH_GRH; - } - lrh0 |= qp->remote_ah_attr.sl << 4; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid | - qp->remote_ah_attr.src_path_bits); - bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); - bth0 |= extra_bytes << 20; - ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22)); - ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[2] = cpu_to_be32(bth2); -} - -/** - * ipath_do_send - perform a send on a QP - * @data: contains a pointer to the QP - * - * Process entries in the send work queue until credit or queue is - * exhausted. Only allow one CPU to send a packet per QP (tasklet). - * Otherwise, two threads could send packets out of order. - */ -void ipath_do_send(unsigned long data) -{ - struct ipath_qp *qp = (struct ipath_qp *)data; - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - int (*make_req)(struct ipath_qp *qp); - unsigned long flags; - - if ((qp->ibqp.qp_type == IB_QPT_RC || - qp->ibqp.qp_type == IB_QPT_UC) && - qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { - ipath_ruc_loopback(qp); - goto bail; - } - - if (qp->ibqp.qp_type == IB_QPT_RC) - make_req = ipath_make_rc_req; - else if (qp->ibqp.qp_type == IB_QPT_UC) - make_req = ipath_make_uc_req; - else - make_req = ipath_make_ud_req; - - spin_lock_irqsave(&qp->s_lock, flags); - - /* Return if we are already busy processing a work request. */ - if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || - !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { - spin_unlock_irqrestore(&qp->s_lock, flags); - goto bail; - } - - qp->s_flags |= IPATH_S_BUSY; - - spin_unlock_irqrestore(&qp->s_lock, flags); - -again: - /* Check for a constructed packet to be sent. */ - if (qp->s_hdrwords != 0) { - /* - * If no PIO bufs are available, return. An interrupt will - * call ipath_ib_piobufavail() when one is available. - */ - if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, - qp->s_cur_sge, qp->s_cur_size)) { - if (ipath_no_bufs_available(qp, dev)) - goto bail; - } - dev->n_unicast_xmit++; - /* Record that we sent the packet and s_hdr is empty. */ - qp->s_hdrwords = 0; - } - - if (make_req(qp)) - goto again; - -bail:; -} - -/* - * This should be called with s_lock held. - */ -void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, - enum ib_wc_status status) -{ - u32 old_last, last; - - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) - return; - - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof wc); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } - - old_last = last = qp->s_last; - if (++last >= qp->s_size) - last = 0; - qp->s_last = last; - if (qp->s_cur == old_last) - qp->s_cur = last; - if (qp->s_tail == old_last) - qp->s_tail = last; - if (qp->state == IB_QPS_SQD && last == qp->s_cur) - qp->s_draining = 0; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_sdma.c b/kernel/drivers/infiniband/hw/ipath/ipath_sdma.c deleted file mode 100644 index 17a517766..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_sdma.c +++ /dev/null @@ -1,818 +0,0 @@ -/* - * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/spinlock.h> -#include <linux/gfp.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */ - -static void vl15_watchdog_enq(struct ipath_devdata *dd) -{ - /* ipath_sdma_lock must already be held */ - if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) { - unsigned long interval = (HZ + 19) / 20; - dd->ipath_sdma_vl15_timer.expires = jiffies + interval; - add_timer(&dd->ipath_sdma_vl15_timer); - } -} - -static void vl15_watchdog_deq(struct ipath_devdata *dd) -{ - /* ipath_sdma_lock must already be held */ - if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) { - unsigned long interval = (HZ + 19) / 20; - mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval); - } else { - del_timer(&dd->ipath_sdma_vl15_timer); - } -} - -static void vl15_watchdog_timeout(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - - if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) { - ipath_dbg("vl15 watchdog timeout - clearing\n"); - ipath_cancel_sends(dd, 1); - ipath_hol_down(dd); - } else { - ipath_dbg("vl15 watchdog timeout - " - "condition already cleared\n"); - } -} - -static void unmap_desc(struct ipath_devdata *dd, unsigned head) -{ - __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0]; - u64 desc[2]; - dma_addr_t addr; - size_t len; - - desc[0] = le64_to_cpu(descqp[0]); - desc[1] = le64_to_cpu(descqp[1]); - - addr = (desc[1] << 32) | (desc[0] >> 32); - len = (desc[0] >> 14) & (0x7ffULL << 2); - dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE); -} - -/* - * ipath_sdma_lock should be locked before calling this. - */ -int ipath_sdma_make_progress(struct ipath_devdata *dd) -{ - struct list_head *lp = NULL; - struct ipath_sdma_txreq *txp = NULL; - u16 dmahead; - u16 start_idx = 0; - int progress = 0; - - if (!list_empty(&dd->ipath_sdma_activelist)) { - lp = dd->ipath_sdma_activelist.next; - txp = list_entry(lp, struct ipath_sdma_txreq, list); - start_idx = txp->start_idx; - } - - /* - * Read the SDMA head register in order to know that the - * interrupt clear has been written to the chip. - * Otherwise, we may not get an interrupt for the last - * descriptor in the queue. - */ - dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead); - /* sanity check return value for error handling (chip reset, etc.) */ - if (dmahead >= dd->ipath_sdma_descq_cnt) - goto done; - - while (dd->ipath_sdma_descq_head != dmahead) { - if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC && - dd->ipath_sdma_descq_head == start_idx) { - unmap_desc(dd, dd->ipath_sdma_descq_head); - start_idx++; - if (start_idx == dd->ipath_sdma_descq_cnt) - start_idx = 0; - } - - /* increment free count and head */ - dd->ipath_sdma_descq_removed++; - if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt) - dd->ipath_sdma_descq_head = 0; - - if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) { - /* move to notify list */ - if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) - vl15_watchdog_deq(dd); - list_move_tail(lp, &dd->ipath_sdma_notifylist); - if (!list_empty(&dd->ipath_sdma_activelist)) { - lp = dd->ipath_sdma_activelist.next; - txp = list_entry(lp, struct ipath_sdma_txreq, - list); - start_idx = txp->start_idx; - } else { - lp = NULL; - txp = NULL; - } - } - progress = 1; - } - - if (progress) - tasklet_hi_schedule(&dd->ipath_sdma_notify_task); - -done: - return progress; -} - -static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list) -{ - struct ipath_sdma_txreq *txp, *txp_next; - - list_for_each_entry_safe(txp, txp_next, list, list) { - list_del_init(&txp->list); - - if (txp->callback) - (*txp->callback)(txp->callback_cookie, - txp->callback_status); - } -} - -static void sdma_notify_taskbody(struct ipath_devdata *dd) -{ - unsigned long flags; - struct list_head list; - - INIT_LIST_HEAD(&list); - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - list_splice_init(&dd->ipath_sdma_notifylist, &list); - - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - ipath_sdma_notify(dd, &list); - - /* - * The IB verbs layer needs to see the callback before getting - * the call to ipath_ib_piobufavail() because the callback - * handles releasing resources the next send will need. - * Otherwise, we could do these calls in - * ipath_sdma_make_progress(). - */ - ipath_ib_piobufavail(dd->verbs_dev); -} - -static void sdma_notify_task(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - sdma_notify_taskbody(dd); -} - -static void dump_sdma_state(struct ipath_devdata *dd) -{ - unsigned long reg; - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus); - ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl); - ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0); - ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1); - ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2); - ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); - ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg); - - reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); - ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg); -} - -static void sdma_abort_task(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - u64 status; - unsigned long flags; - - if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - return; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK; - - /* nothing to do */ - if (status == IPATH_SDMA_ABORT_NONE) - goto unlock; - - /* ipath_sdma_abort() is done, waiting for interrupt */ - if (status == IPATH_SDMA_ABORT_DISARMED) { - if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout)) - goto resched_noprint; - /* give up, intr got lost somewhere */ - ipath_dbg("give up waiting for SDMADISABLED intr\n"); - __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - status = IPATH_SDMA_ABORT_ABORTED; - } - - /* everything is stopped, time to clean up and restart */ - if (status == IPATH_SDMA_ABORT_ABORTED) { - struct ipath_sdma_txreq *txp, *txpnext; - u64 hwstatus; - int notify = 0; - - hwstatus = ipath_read_kreg64(dd, - dd->ipath_kregs->kr_senddmastatus); - - if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG | - IPATH_SDMA_STATUS_ABORT_IN_PROG | - IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) || - !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) { - if (dd->ipath_sdma_reset_wait > 0) { - /* not done shutting down sdma */ - --dd->ipath_sdma_reset_wait; - goto resched; - } - ipath_cdbg(VERBOSE, "gave up waiting for quiescent " - "status after SDMA reset, continuing\n"); - dump_sdma_state(dd); - } - - /* dequeue all "sent" requests */ - list_for_each_entry_safe(txp, txpnext, - &dd->ipath_sdma_activelist, list) { - txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED; - if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) - vl15_watchdog_deq(dd); - list_move_tail(&txp->list, &dd->ipath_sdma_notifylist); - notify = 1; - } - if (notify) - tasklet_hi_schedule(&dd->ipath_sdma_notify_task); - - /* reset our notion of head and tail */ - dd->ipath_sdma_descq_tail = 0; - dd->ipath_sdma_descq_head = 0; - dd->ipath_sdma_head_dma[0] = 0; - dd->ipath_sdma_generation = 0; - dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added; - - /* Reset SendDmaLenGen */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, - (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18)); - - /* done with sdma state for a bit */ - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - /* - * Don't restart sdma here (with the exception - * below). Wait until link is up to ACTIVE. VL15 MADs - * used to bring the link up use PIO, and multiple link - * transitions otherwise cause the sdma engine to be - * stopped and started multiple times. - * The disable is done here, including the shadow, - * so the state is kept consistent. - * See ipath_restart_sdma() for the actual starting - * of sdma. - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* make sure I see next message */ - dd->ipath_sdma_abort_jiffies = 0; - - /* - * Not everything that takes SDMA offline is a link - * status change. If the link was up, restart SDMA. - */ - if (dd->ipath_flags & IPATH_LINKACTIVE) - ipath_restart_sdma(dd); - - goto done; - } - -resched: - /* - * for now, keep spinning - * JAG - this is bad to just have default be a loop without - * state change - */ - if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) { - ipath_dbg("looping with status 0x%08lx\n", - dd->ipath_sdma_status); - dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; - } -resched_noprint: - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - tasklet_hi_schedule(&dd->ipath_sdma_abort_task); - return; - -unlock: - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); -done: - return; -} - -/* - * This is called from interrupt context. - */ -void ipath_sdma_intr(struct ipath_devdata *dd) -{ - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - (void) ipath_sdma_make_progress(dd); - - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); -} - -static int alloc_sdma(struct ipath_devdata *dd) -{ - int ret = 0; - - /* Allocate memory for SendDMA descriptor FIFO */ - dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev, - SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL); - - if (!dd->ipath_sdma_descq) { - ipath_dev_err(dd, "failed to allocate SendDMA descriptor " - "FIFO memory\n"); - ret = -ENOMEM; - goto done; - } - - dd->ipath_sdma_descq_cnt = - SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc); - - /* Allocate memory for DMA of head register to memory */ - dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev, - PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL); - if (!dd->ipath_sdma_head_dma) { - ipath_dev_err(dd, "failed to allocate SendDMA head memory\n"); - ret = -ENOMEM; - goto cleanup_descq; - } - dd->ipath_sdma_head_dma[0] = 0; - - init_timer(&dd->ipath_sdma_vl15_timer); - dd->ipath_sdma_vl15_timer.function = vl15_watchdog_timeout; - dd->ipath_sdma_vl15_timer.data = (unsigned long)dd; - atomic_set(&dd->ipath_sdma_vl15_count, 0); - - goto done; - -cleanup_descq: - dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ, - (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys); - dd->ipath_sdma_descq = NULL; - dd->ipath_sdma_descq_phys = 0; -done: - return ret; -} - -int setup_sdma(struct ipath_devdata *dd) -{ - int ret = 0; - unsigned i, n; - u64 tmp64; - u64 senddmabufmask[3] = { 0 }; - unsigned long flags; - - ret = alloc_sdma(dd); - if (ret) - goto done; - - if (!dd->ipath_sdma_descq) { - ipath_dev_err(dd, "SendDMA memory not allocated\n"); - goto done; - } - - /* - * Set initial status as if we had been up, then gone down. - * This lets initial start on transition to ACTIVE be the - * same as restart after link flap. - */ - dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED; - dd->ipath_sdma_abort_jiffies = 0; - dd->ipath_sdma_generation = 0; - dd->ipath_sdma_descq_tail = 0; - dd->ipath_sdma_descq_head = 0; - dd->ipath_sdma_descq_removed = 0; - dd->ipath_sdma_descq_added = 0; - - /* Set SendDmaBase */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, - dd->ipath_sdma_descq_phys); - /* Set SendDmaLenGen */ - tmp64 = dd->ipath_sdma_descq_cnt; - tmp64 |= 1<<18; /* enable generation checking */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64); - /* Set SendDmaTail */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, - dd->ipath_sdma_descq_tail); - /* Set SendDmaHeadAddr */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, - dd->ipath_sdma_head_phys); - - /* - * Reserve all the former "kernel" piobufs, using high number range - * so we get as many 4K buffers as possible - */ - n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; - i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved; - ipath_chg_pioavailkernel(dd, i, n - i , 0); - for (; i < n; ++i) { - unsigned word = i / 64; - unsigned bit = i & 63; - BUG_ON(word >= 3); - senddmabufmask[word] |= 1ULL << bit; - } - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, - senddmabufmask[0]); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, - senddmabufmask[1]); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, - senddmabufmask[2]); - - INIT_LIST_HEAD(&dd->ipath_sdma_activelist); - INIT_LIST_HEAD(&dd->ipath_sdma_notifylist); - - tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task, - (unsigned long) dd); - tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task, - (unsigned long) dd); - - /* - * No use to turn on SDMA here, as link is probably not ACTIVE - * Just mark it RUNNING and enable the interrupt, and let the - * ipath_restart_sdma() on link transition to ACTIVE actually - * enable it. - */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - -done: - return ret; -} - -void teardown_sdma(struct ipath_devdata *dd) -{ - struct ipath_sdma_txreq *txp, *txpnext; - unsigned long flags; - dma_addr_t sdma_head_phys = 0; - dma_addr_t sdma_descq_phys = 0; - void *sdma_descq = NULL; - void *sdma_head_dma = NULL; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status); - __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - tasklet_kill(&dd->ipath_sdma_abort_task); - tasklet_kill(&dd->ipath_sdma_notify_task); - - /* turn off sdma */ - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - /* dequeue all "sent" requests */ - list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist, - list) { - txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN; - if (txp->flags & IPATH_SDMA_TXREQ_F_VL15) - vl15_watchdog_deq(dd); - list_move_tail(&txp->list, &dd->ipath_sdma_notifylist); - } - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - sdma_notify_taskbody(dd); - - del_timer_sync(&dd->ipath_sdma_vl15_timer); - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - dd->ipath_sdma_abort_jiffies = 0; - - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0); - - if (dd->ipath_sdma_head_dma) { - sdma_head_dma = (void *) dd->ipath_sdma_head_dma; - sdma_head_phys = dd->ipath_sdma_head_phys; - dd->ipath_sdma_head_dma = NULL; - dd->ipath_sdma_head_phys = 0; - } - - if (dd->ipath_sdma_descq) { - sdma_descq = dd->ipath_sdma_descq; - sdma_descq_phys = dd->ipath_sdma_descq_phys; - dd->ipath_sdma_descq = NULL; - dd->ipath_sdma_descq_phys = 0; - } - - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - if (sdma_head_dma) - dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - sdma_head_dma, sdma_head_phys); - - if (sdma_descq) - dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ, - sdma_descq, sdma_descq_phys); -} - -/* - * [Re]start SDMA, if we use it, and it's not already OK. - * This is called on transition to link ACTIVE, either the first or - * subsequent times. - */ -void ipath_restart_sdma(struct ipath_devdata *dd) -{ - unsigned long flags; - int needed = 1; - - if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) - goto bail; - - /* - * First, make sure we should, which is to say, - * check that we are "RUNNING" (not in teardown) - * and not "SHUTDOWN" - */ - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status) - || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) - needed = 0; - else { - __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); - __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); - __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - } - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - if (!needed) { - ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n", - dd->ipath_sdma_status); - goto bail; - } - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - /* - * First clear, just to be safe. Enable is only done - * in chip on 0->1 transition - */ - dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - - /* notify upper layers */ - ipath_ib_piobufavail(dd->verbs_dev); - -bail: - return; -} - -static inline void make_sdma_desc(struct ipath_devdata *dd, - u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset) -{ - WARN_ON(addr & 3); - /* SDmaPhyAddr[47:32] */ - sdmadesc[1] = addr >> 32; - /* SDmaPhyAddr[31:0] */ - sdmadesc[0] = (addr & 0xfffffffcULL) << 32; - /* SDmaGeneration[1:0] */ - sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30; - /* SDmaDwordCount[10:0] */ - sdmadesc[0] |= (dwlen & 0x7ffULL) << 16; - /* SDmaBufOffset[12:2] */ - sdmadesc[0] |= dwoffset & 0x7ffULL; -} - -/* - * This function queues one IB packet onto the send DMA queue per call. - * The caller is responsible for checking: - * 1) The number of send DMA descriptor entries is less than the size of - * the descriptor queue. - * 2) The IB SGE addresses and lengths are 32-bit aligned - * (except possibly the last SGE's length) - * 3) The SGE addresses are suitable for passing to dma_map_single(). - */ -int ipath_sdma_verbs_send(struct ipath_devdata *dd, - struct ipath_sge_state *ss, u32 dwords, - struct ipath_verbs_txreq *tx) -{ - - unsigned long flags; - struct ipath_sge *sge; - int ret = 0; - u16 tail; - __le64 *descqp; - u64 sdmadesc[2]; - u32 dwoffset; - dma_addr_t addr; - - if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) { - ipath_dbg("packet size %X > ibmax %X, fail\n", - tx->map_len + (dwords<<2), dd->ipath_ibmaxlen); - ret = -EMSGSIZE; - goto fail; - } - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - -retry: - if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) { - ret = -EBUSY; - goto unlock; - } - - if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) { - if (ipath_sdma_make_progress(dd)) - goto retry; - ret = -ENOBUFS; - goto unlock; - } - - addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, - tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, addr)) - goto ioerr; - - dwoffset = tx->map_len >> 2; - make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0); - - /* SDmaFirstDesc */ - sdmadesc[0] |= 1ULL << 12; - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) - sdmadesc[0] |= 1ULL << 14; /* SDmaUseLargeBuf */ - - /* write to the descq */ - tail = dd->ipath_sdma_descq_tail; - descqp = &dd->ipath_sdma_descq[tail].qw[0]; - *descqp++ = cpu_to_le64(sdmadesc[0]); - *descqp++ = cpu_to_le64(sdmadesc[1]); - - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC) - tx->txreq.start_idx = tail; - - /* increment the tail */ - if (++tail == dd->ipath_sdma_descq_cnt) { - tail = 0; - descqp = &dd->ipath_sdma_descq[0].qw[0]; - ++dd->ipath_sdma_generation; - } - - sge = &ss->sge; - while (dwords) { - u32 dw; - u32 len; - - len = dwords << 2; - if (len > sge->length) - len = sge->length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - dw = (len + 3) >> 2; - addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2, - DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, addr)) - goto unmap; - make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset); - /* SDmaUseLargeBuf has to be set in every descriptor */ - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) - sdmadesc[0] |= 1ULL << 14; - /* write to the descq */ - *descqp++ = cpu_to_le64(sdmadesc[0]); - *descqp++ = cpu_to_le64(sdmadesc[1]); - - /* increment the tail */ - if (++tail == dd->ipath_sdma_descq_cnt) { - tail = 0; - descqp = &dd->ipath_sdma_descq[0].qw[0]; - ++dd->ipath_sdma_generation; - } - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - - dwoffset += dw; - dwords -= dw; - } - - if (!tail) - descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0]; - descqp -= 2; - /* SDmaLastDesc */ - descqp[0] |= cpu_to_le64(1ULL << 11); - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) { - /* SDmaIntReq */ - descqp[0] |= cpu_to_le64(1ULL << 15); - } - - /* Commit writes to memory and advance the tail on the chip */ - wmb(); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail); - - tx->txreq.next_descq_idx = tail; - tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK; - dd->ipath_sdma_descq_tail = tail; - dd->ipath_sdma_descq_added += tx->txreq.sg_count; - list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist); - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15) - vl15_watchdog_enq(dd); - goto unlock; - -unmap: - while (tail != dd->ipath_sdma_descq_tail) { - if (!tail) - tail = dd->ipath_sdma_descq_cnt - 1; - else - tail--; - unmap_desc(dd, tail); - } -ioerr: - ret = -EIO; -unlock: - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); -fail: - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_srq.c b/kernel/drivers/infiniband/hw/ipath/ipath_srq.c deleted file mode 100644 index 26271984b..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_srq.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include "ipath_verbs.h" - -/** - * ipath_post_srq_receive - post a receive on a shared receive queue - * @ibsrq: the SRQ to post the receive on - * @wr: the list of work requests to post - * @bad_wr: the first WR to cause a problem is put here - * - * This may be called from interrupt context. - */ -int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_rwq *wq; - unsigned long flags; - int ret; - - for (; wr; wr = wr->next) { - struct ipath_rwqe *wqe; - u32 next; - int i; - - if ((unsigned) wr->num_sge > srq->rq.max_sge) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&srq->rq.lock, flags); - wq = srq->rq.wq; - next = wq->head + 1; - if (next >= srq->rq.size) - next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&srq->rq.lock, flags); - *bad_wr = wr; - ret = -ENOMEM; - goto bail; - } - - wqe = get_rwqe_ptr(&srq->rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&srq->rq.lock, flags); - } - ret = 0; - -bail: - return ret; -} - -/** - * ipath_create_srq - create a shared receive queue - * @ibpd: the protection domain of the SRQ to create - * @srq_init_attr: the attributes of the SRQ - * @udata: data from libipathverbs when creating a user SRQ - */ -struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) -{ - struct ipath_ibdev *dev = to_idev(ibpd->device); - struct ipath_srq *srq; - u32 sz; - struct ib_srq *ret; - - if (srq_init_attr->srq_type != IB_SRQT_BASIC) { - ret = ERR_PTR(-ENOSYS); - goto done; - } - - if (srq_init_attr->attr.max_wr == 0) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) || - (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) { - ret = ERR_PTR(-EINVAL); - goto done; - } - - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - ret = ERR_PTR(-ENOMEM); - goto done; - } - - /* - * Need to use vmalloc() if we want to support large #s of entries. - */ - srq->rq.size = srq_init_attr->attr.max_wr + 1; - srq->rq.max_sge = srq_init_attr->attr.max_sge; - sz = sizeof(struct ib_sge) * srq->rq.max_sge + - sizeof(struct ipath_rwqe); - srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz); - if (!srq->rq.wq) { - ret = ERR_PTR(-ENOMEM); - goto bail_srq; - } - - /* - * Return the address of the RWQ as the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->outlen >= sizeof(__u64)) { - int err; - u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz; - - srq->ip = - ipath_create_mmap_info(dev, s, - ibpd->uobject->context, - srq->rq.wq); - if (!srq->ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wq; - } - - err = ib_copy_to_udata(udata, &srq->ip->offset, - sizeof(srq->ip->offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_ip; - } - } else - srq->ip = NULL; - - /* - * ib_create_srq() will initialize srq->ibsrq. - */ - spin_lock_init(&srq->rq.lock); - srq->rq.wq->head = 0; - srq->rq.wq->tail = 0; - srq->limit = srq_init_attr->attr.srq_limit; - - spin_lock(&dev->n_srqs_lock); - if (dev->n_srqs_allocated == ib_ipath_max_srqs) { - spin_unlock(&dev->n_srqs_lock); - ret = ERR_PTR(-ENOMEM); - goto bail_ip; - } - - dev->n_srqs_allocated++; - spin_unlock(&dev->n_srqs_lock); - - if (srq->ip) { - spin_lock_irq(&dev->pending_lock); - list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - - ret = &srq->ibsrq; - goto done; - -bail_ip: - kfree(srq->ip); -bail_wq: - vfree(srq->rq.wq); -bail_srq: - kfree(srq); -done: - return ret; -} - -/** - * ipath_modify_srq - modify a shared receive queue - * @ibsrq: the SRQ to modify - * @attr: the new attributes of the SRQ - * @attr_mask: indicates which attributes to modify - * @udata: user data for ipathverbs.so - */ -int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, - struct ib_udata *udata) -{ - struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_rwq *wq; - int ret = 0; - - if (attr_mask & IB_SRQ_MAX_WR) { - struct ipath_rwq *owq; - struct ipath_rwqe *p; - u32 sz, size, n, head, tail; - - /* Check that the requested sizes are below the limits. */ - if ((attr->max_wr > ib_ipath_max_srq_wrs) || - ((attr_mask & IB_SRQ_LIMIT) ? - attr->srq_limit : srq->limit) > attr->max_wr) { - ret = -EINVAL; - goto bail; - } - - sz = sizeof(struct ipath_rwqe) + - srq->rq.max_sge * sizeof(struct ib_sge); - size = attr->max_wr + 1; - wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz); - if (!wq) { - ret = -ENOMEM; - goto bail; - } - - /* Check that we can write the offset to mmap. */ - if (udata && udata->inlen >= sizeof(__u64)) { - __u64 offset_addr; - __u64 offset = 0; - - ret = ib_copy_from_udata(&offset_addr, udata, - sizeof(offset_addr)); - if (ret) - goto bail_free; - udata->outbuf = - (void __user *) (unsigned long) offset_addr; - ret = ib_copy_to_udata(udata, &offset, - sizeof(offset)); - if (ret) - goto bail_free; - } - - spin_lock_irq(&srq->rq.lock); - /* - * validate head pointer value and compute - * the number of remaining WQEs. - */ - owq = srq->rq.wq; - head = owq->head; - if (head >= srq->rq.size) - head = 0; - tail = owq->tail; - if (tail >= srq->rq.size) - tail = 0; - n = head; - if (n < tail) - n += srq->rq.size - tail; - else - n -= tail; - if (size <= n) { - ret = -EINVAL; - goto bail_unlock; - } - n = 0; - p = wq->wq; - while (tail != head) { - struct ipath_rwqe *wqe; - int i; - - wqe = get_rwqe_ptr(&srq->rq, tail); - p->wr_id = wqe->wr_id; - p->num_sge = wqe->num_sge; - for (i = 0; i < wqe->num_sge; i++) - p->sg_list[i] = wqe->sg_list[i]; - n++; - p = (struct ipath_rwqe *)((char *) p + sz); - if (++tail >= srq->rq.size) - tail = 0; - } - srq->rq.wq = wq; - srq->rq.size = size; - wq->head = n; - wq->tail = 0; - if (attr_mask & IB_SRQ_LIMIT) - srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); - - vfree(owq); - - if (srq->ip) { - struct ipath_mmap_info *ip = srq->ip; - struct ipath_ibdev *dev = to_idev(srq->ibsrq.device); - u32 s = sizeof(struct ipath_rwq) + size * sz; - - ipath_update_mmap_info(dev, ip, s, wq); - - /* - * Return the offset to mmap. - * See ipath_mmap() for details. - */ - if (udata && udata->inlen >= sizeof(__u64)) { - ret = ib_copy_to_udata(udata, &ip->offset, - sizeof(ip->offset)); - if (ret) - goto bail; - } - - spin_lock_irq(&dev->pending_lock); - if (list_empty(&ip->pending_mmaps)) - list_add(&ip->pending_mmaps, - &dev->pending_mmaps); - spin_unlock_irq(&dev->pending_lock); - } - } else if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irq(&srq->rq.lock); - if (attr->srq_limit >= srq->rq.size) - ret = -EINVAL; - else - srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); - } - goto bail; - -bail_unlock: - spin_unlock_irq(&srq->rq.lock); -bail_free: - vfree(wq); -bail: - return ret; -} - -int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) -{ - struct ipath_srq *srq = to_isrq(ibsrq); - - attr->max_wr = srq->rq.size - 1; - attr->max_sge = srq->rq.max_sge; - attr->srq_limit = srq->limit; - return 0; -} - -/** - * ipath_destroy_srq - destroy a shared receive queue - * @ibsrq: the SRQ to destroy - */ -int ipath_destroy_srq(struct ib_srq *ibsrq) -{ - struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_ibdev *dev = to_idev(ibsrq->device); - - spin_lock(&dev->n_srqs_lock); - dev->n_srqs_allocated--; - spin_unlock(&dev->n_srqs_lock); - if (srq->ip) - kref_put(&srq->ip->ref, ipath_release_mmap_info); - else - vfree(srq->rq.wq); - kfree(srq); - - return 0; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_stats.c b/kernel/drivers/infiniband/hw/ipath/ipath_stats.c deleted file mode 100644 index f63e143e3..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_stats.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ipath_kernel.h" - -struct infinipath_stats ipath_stats; - -/** - * ipath_snap_cntr - snapshot a chip counter - * @dd: the infinipath device - * @creg: the counter to snapshot - * - * called from add_timer and user counter read calls, to deal with - * counters that wrap in "human time". The words sent and received, and - * the packets sent and received are all that we worry about. For now, - * at least, we don't worry about error counters, because if they wrap - * that quickly, we probably don't care. We may eventually just make this - * handle all the counters. word counters can wrap in about 20 seconds - * of full bandwidth traffic, packet counters in a few hours. - */ - -u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) -{ - u32 val, reg64 = 0; - u64 val64; - unsigned long t0, t1; - u64 ret; - - t0 = jiffies; - /* If fast increment counters are only 32 bits, snapshot them, - * and maintain them as 64bit values in the driver */ - if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) && - (creg == dd->ipath_cregs->cr_wordsendcnt || - creg == dd->ipath_cregs->cr_wordrcvcnt || - creg == dd->ipath_cregs->cr_pktsendcnt || - creg == dd->ipath_cregs->cr_pktrcvcnt)) { - val64 = ipath_read_creg(dd, creg); - val = val64 == ~0ULL ? ~0U : 0; - reg64 = 1; - } else /* val64 just to keep gcc quiet... */ - val64 = val = ipath_read_creg32(dd, creg); - /* - * See if a second has passed. This is just a way to detect things - * that are quite broken. Normally this should take just a few - * cycles (the check is for long enough that we don't care if we get - * pre-empted.) An Opteron HT O read timeout is 4 seconds with - * normal NB values - */ - t1 = jiffies; - if (time_before(t0 + HZ, t1) && val == -1) { - ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n", - creg); - ret = 0ULL; - goto bail; - } - if (reg64) { - ret = val64; - goto bail; - } - - if (creg == dd->ipath_cregs->cr_wordsendcnt) { - if (val != dd->ipath_lastsword) { - dd->ipath_sword += val - dd->ipath_lastsword; - dd->ipath_lastsword = val; - } - val64 = dd->ipath_sword; - } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { - if (val != dd->ipath_lastrword) { - dd->ipath_rword += val - dd->ipath_lastrword; - dd->ipath_lastrword = val; - } - val64 = dd->ipath_rword; - } else if (creg == dd->ipath_cregs->cr_pktsendcnt) { - if (val != dd->ipath_lastspkts) { - dd->ipath_spkts += val - dd->ipath_lastspkts; - dd->ipath_lastspkts = val; - } - val64 = dd->ipath_spkts; - } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) { - if (val != dd->ipath_lastrpkts) { - dd->ipath_rpkts += val - dd->ipath_lastrpkts; - dd->ipath_lastrpkts = val; - } - val64 = dd->ipath_rpkts; - } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) { - if (dd->ibdeltainprog) - val64 -= val64 - dd->ibsymsnap; - val64 -= dd->ibsymdelta; - } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) { - if (dd->ibdeltainprog) - val64 -= val64 - dd->iblnkerrsnap; - val64 -= dd->iblnkerrdelta; - } else - val64 = (u64) val; - - ret = val64; - -bail: - return ret; -} - -/** - * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports - * @dd: the infinipath device - * - * print the delta of egrfull/hdrqfull errors for kernel ports no more than - * every 5 seconds. User processes are printed at close, but kernel doesn't - * close, so... Separate routine so may call from other places someday, and - * so function name when printed by _IPATH_INFO is meaningfull - */ -static void ipath_qcheck(struct ipath_devdata *dd) -{ - static u64 last_tot_hdrqfull; - struct ipath_portdata *pd = dd->ipath_pd[0]; - size_t blen = 0; - char buf[128]; - u32 hdrqtail; - - *buf = 0; - if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) { - blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u", - pd->port_hdrqfull - - dd->ipath_p0_hdrqfull); - dd->ipath_p0_hdrqfull = pd->port_hdrqfull; - } - if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) { - blen += snprintf(buf + blen, sizeof buf - blen, - "%srcvegrfull %llu", - blen ? ", " : "", - (unsigned long long) - (ipath_stats.sps_etidfull - - dd->ipath_last_tidfull)); - dd->ipath_last_tidfull = ipath_stats.sps_etidfull; - } - - /* - * this is actually the number of hdrq full interrupts, not actual - * events, but at the moment that's mostly what I'm interested in. - * Actual count, etc. is in the counters, if needed. For production - * users this won't ordinarily be printed. - */ - - if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) && - ipath_stats.sps_hdrqfull != last_tot_hdrqfull) { - blen += snprintf(buf + blen, sizeof buf - blen, - "%shdrqfull %llu (all ports)", - blen ? ", " : "", - (unsigned long long) - (ipath_stats.sps_hdrqfull - - last_tot_hdrqfull)); - last_tot_hdrqfull = ipath_stats.sps_hdrqfull; - } - if (blen) - ipath_dbg("%s\n", buf); - - hdrqtail = ipath_get_hdrqtail(pd); - if (pd->port_head != hdrqtail) { - if (dd->ipath_lastport0rcv_cnt == - ipath_stats.sps_port0pkts) { - ipath_cdbg(PKT, "missing rcv interrupts? " - "port0 hd=%x tl=%x; port0pkts %llx; write" - " hd (w/intr)\n", - pd->port_head, hdrqtail, - (unsigned long long) - ipath_stats.sps_port0pkts); - ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail | - dd->ipath_rhdrhead_intr_off, pd->port_port); - } - dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts; - } -} - -static void ipath_chk_errormask(struct ipath_devdata *dd) -{ - static u32 fixed; - u32 ctrl; - unsigned long errormask; - unsigned long hwerrs; - - if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED)) - return; - - errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); - - if (errormask == dd->ipath_errormask) - return; - fixed++; - - hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); - ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); - - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); - - if ((hwerrs & dd->ipath_hwerrmask) || - (ctrl & INFINIPATH_C_FREEZEMODE)) { - /* force re-interrupt of pending events, just in case */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); - dev_info(&dd->pcidev->dev, - "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n", - fixed, errormask, (unsigned long)dd->ipath_errormask, - ctrl, hwerrs); - } else - ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n", - fixed, errormask, - (unsigned long)dd->ipath_errormask); -} - - -/** - * ipath_get_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the infinipath device ipath_devdata - * - * called from add_timer - */ -void ipath_get_faststats(unsigned long opaque) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - int i; - static unsigned cnt; - unsigned long flags; - u64 traffic_wds; - - /* - * don't access the chip while running diags, or memory diags can - * fail - */ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) || - ipath_diag_inuse) - /* but re-arm the timer, for diags case; won't hurt other */ - goto done; - - /* - * We now try to maintain a "active timer", based on traffic - * exceeding a threshold, so we need to check the word-counts - * even if they are 64-bit. - */ - traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); - traffic_wds -= dd->ipath_traffic_wds; - dd->ipath_traffic_wds += traffic_wds; - if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD) - atomic_add(5, &dd->ipath_active_time); /* S/B #define */ - spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); - - if (dd->ipath_flags & IPATH_32BITCOUNTERS) { - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - } - - ipath_qcheck(dd); - - /* - * deal with repeat error suppression. Doesn't really matter if - * last error was almost a full interval ago, or just a few usecs - * ago; still won't get more than 2 per interval. We may want - * longer intervals for this eventually, could do with mod, counter - * or separate timer. Also see code in ipath_handle_errors() and - * ipath_handle_hwerrors(). - */ - - if (dd->ipath_lasterror) - dd->ipath_lasterror = 0; - if (dd->ipath_lasthwerror) - dd->ipath_lasthwerror = 0; - if (dd->ipath_maskederrs - && time_after(jiffies, dd->ipath_unmasktime)) { - char ebuf[256]; - int iserr; - iserr = ipath_decode_err(dd, ebuf, sizeof ebuf, - dd->ipath_maskederrs); - if (dd->ipath_maskederrs & - ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_PKTERRS)) - ipath_dev_err(dd, "Re-enabling masked errors " - "(%s)\n", ebuf); - else { - /* - * rcvegrfull and rcvhdrqfull are "normal", for some - * types of processes (mostly benchmarks) that send - * huge numbers of messages, while not processing - * them. So only complain about these at debug - * level. - */ - if (iserr) - ipath_dbg( - "Re-enabling queue full errors (%s)\n", - ebuf); - else - ipath_cdbg(ERRPKT, "Re-enabling packet" - " problem interrupt (%s)\n", ebuf); - } - - /* re-enable masked errors */ - dd->ipath_errormask |= dd->ipath_maskederrs; - ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, - dd->ipath_errormask); - dd->ipath_maskederrs = 0; - } - - /* limit qfull messages to ~one per minute per port */ - if ((++cnt & 0x10)) { - for (i = (int) dd->ipath_cfgports; --i >= 0; ) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - - if (pd && pd->port_lastrcvhdrqtail != -1) - pd->port_lastrcvhdrqtail = -1; - } - } - - ipath_chk_errormask(dd); -done: - mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_sysfs.c b/kernel/drivers/infiniband/hw/ipath/ipath_sysfs.c deleted file mode 100644 index 75558f33f..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ /dev/null @@ -1,1238 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/ctype.h> -#include <linux/stat.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -/** - * ipath_parse_ushort - parse an unsigned short value in an arbitrary base - * @str: the string containing the number - * @valp: where to put the result - * - * returns the number of bytes consumed, or negative value on error - */ -int ipath_parse_ushort(const char *str, unsigned short *valp) -{ - unsigned long val; - char *end; - int ret; - - if (!isdigit(str[0])) { - ret = -EINVAL; - goto bail; - } - - val = simple_strtoul(str, &end, 0); - - if (val > 0xffff) { - ret = -EINVAL; - goto bail; - } - - *valp = val; - - ret = end + 1 - str; - if (ret == 0) - ret = -EINVAL; - -bail: - return ret; -} - -static ssize_t show_version(struct device_driver *dev, char *buf) -{ - /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version); -} - -static ssize_t show_num_units(struct device_driver *dev, char *buf) -{ - return scnprintf(buf, PAGE_SIZE, "%d\n", - ipath_count_units(NULL, NULL, NULL)); -} - -static ssize_t show_status(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - ssize_t ret; - - if (!dd->ipath_statusp) { - ret = -EINVAL; - goto bail; - } - - ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", - (unsigned long long) *(dd->ipath_statusp)); - -bail: - return ret; -} - -static const char *ipath_status_str[] = { - "Initted", - "Disabled", - "Admin_Disabled", - "", /* This used to be the old "OIB_SMA" status. */ - "", /* This used to be the old "SMA" status. */ - "Present", - "IB_link_up", - "IB_configured", - "NoIBcable", - "Fatal_Hardware_Error", - NULL, -}; - -static ssize_t show_status_str(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int i, any; - u64 s; - ssize_t ret; - - if (!dd->ipath_statusp) { - ret = -EINVAL; - goto bail; - } - - s = *(dd->ipath_statusp); - *buf = '\0'; - for (any = i = 0; s && ipath_status_str[i]; i++) { - if (s & 1) { - if (any && strlcat(buf, " ", PAGE_SIZE) >= - PAGE_SIZE) - /* overflow */ - break; - if (strlcat(buf, ipath_status_str[i], - PAGE_SIZE) >= PAGE_SIZE) - break; - any = 1; - } - s >>= 1; - } - if (any) - strlcat(buf, "\n", PAGE_SIZE); - - ret = strlen(buf); - -bail: - return ret; -} - -static ssize_t show_boardversion(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion); -} - -static ssize_t show_localbus_info(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info); -} - -static ssize_t show_lmc(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc); -} - -static ssize_t store_lmc(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 lmc = 0; - int ret; - - ret = ipath_parse_ushort(buf, &lmc); - if (ret < 0) - goto invalid; - - if (lmc > 7) { - ret = -EINVAL; - goto invalid; - } - - ipath_set_lid(dd, dd->ipath_lid, lmc); - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc); -bail: - return ret; -} - -static ssize_t show_lid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid); -} - -static ssize_t store_lid(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 lid = 0; - int ret; - - ret = ipath_parse_ushort(buf, &lid); - if (ret < 0) - goto invalid; - - if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) { - ret = -EINVAL; - goto invalid; - } - - ipath_set_lid(dd, lid, dd->ipath_lmc); - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid); -bail: - return ret; -} - -static ssize_t show_mlid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid); -} - -static ssize_t store_mlid(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 mlid; - int ret; - - ret = ipath_parse_ushort(buf, &mlid); - if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE) - goto invalid; - - dd->ipath_mlid = mlid; - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid MLID\n"); -bail: - return ret; -} - -static ssize_t show_guid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u8 *guid; - - guid = (u8 *) & (dd->ipath_guid); - - return scnprintf(buf, PAGE_SIZE, - "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", - guid[0], guid[1], guid[2], guid[3], - guid[4], guid[5], guid[6], guid[7]); -} - -static ssize_t store_guid(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - ssize_t ret; - unsigned short guid[8]; - __be64 new_guid; - u8 *ng; - int i; - - if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx", - &guid[0], &guid[1], &guid[2], &guid[3], - &guid[4], &guid[5], &guid[6], &guid[7]) != 8) - goto invalid; - - ng = (u8 *) &new_guid; - - for (i = 0; i < 8; i++) { - if (guid[i] > 0xff) - goto invalid; - ng[i] = guid[i]; - } - - if (new_guid == 0) - goto invalid; - - dd->ipath_guid = new_guid; - dd->ipath_nguid = 1; - if (dd->verbs_dev) - dd->verbs_dev->ibdev.node_guid = new_guid; - - ret = strlen(buf); - goto bail; - -invalid: - ipath_dev_err(dd, "attempt to set invalid GUID\n"); - ret = -EINVAL; - -bail: - return ret; -} - -static ssize_t show_nguid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid); -} - -static ssize_t show_nports(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - /* Return the number of user ports available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1); -} - -static ssize_t show_serial(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - buf[sizeof dd->ipath_serial] = '\0'; - memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial); - strcat(buf, "\n"); - return strlen(buf); -} - -static ssize_t show_unit(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit); -} - -static ssize_t show_jint_max_packets(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets); -} - -static ssize_t store_jint_max_packets(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 v = 0; - int ret; - - ret = ipath_parse_ushort(buf, &v); - if (ret < 0) - ipath_dev_err(dd, "invalid jint_max_packets.\n"); - else - dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v); - - return ret; -} - -static ssize_t show_jint_idle_ticks(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - - return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks); -} - -static ssize_t store_jint_idle_ticks(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - u16 v = 0; - int ret; - - ret = ipath_parse_ushort(buf, &v); - if (ret < 0) - ipath_dev_err(dd, "invalid jint_idle_ticks.\n"); - else - dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets); - - return ret; -} - -#define DEVICE_COUNTER(name, attr) \ - static ssize_t show_counter_##name(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ - { \ - struct ipath_devdata *dd = dev_get_drvdata(dev); \ - return scnprintf(\ - buf, PAGE_SIZE, "%llu\n", (unsigned long long) \ - ipath_snap_cntr( \ - dd, offsetof(struct infinipath_counters, \ - attr) / sizeof(u64))); \ - } \ - static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL); - -DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt); -DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt); -DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt); -DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt); -DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt); -DEVICE_COUNTER(lb_ints, LBIntCnt); -DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt); -DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt); -DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt); -DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt); -DEVICE_COUNTER(rx_dwords, RxDwordCnt); -DEVICE_COUNTER(rx_ebps, RxEBPCnt); -DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt); -DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt); -DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt); -DEVICE_COUNTER(rx_len_errs, RxLenErrCnt); -DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt); -DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt); -DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt); -DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt); -DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt); -DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt); -DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt); -DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt); -DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt); -DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt); -DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt); -DEVICE_COUNTER(tx_dwords, TxDwordCnt); -DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt); -DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt); -DEVICE_COUNTER(tx_len_errs, TxLenErrCnt); -DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt); -DEVICE_COUNTER(tx_underruns, TxUnderrunCnt); -DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt); - -static struct attribute *dev_counter_attributes[] = { - &dev_attr_ib_link_downeds.attr, - &dev_attr_ib_link_err_recoveries.attr, - &dev_attr_ib_status_changes.attr, - &dev_attr_ib_symbol_errs.attr, - &dev_attr_lb_flow_stalls.attr, - &dev_attr_lb_ints.attr, - &dev_attr_rx_bad_formats.attr, - &dev_attr_rx_buf_ovfls.attr, - &dev_attr_rx_data_pkts.attr, - &dev_attr_rx_dropped_pkts.attr, - &dev_attr_rx_dwords.attr, - &dev_attr_rx_ebps.attr, - &dev_attr_rx_flow_ctrl_errs.attr, - &dev_attr_rx_flow_pkts.attr, - &dev_attr_rx_icrc_errs.attr, - &dev_attr_rx_len_errs.attr, - &dev_attr_rx_link_problems.attr, - &dev_attr_rx_lpcrc_errs.attr, - &dev_attr_rx_max_min_len_errs.attr, - &dev_attr_rx_p0_hdr_egr_ovfls.attr, - &dev_attr_rx_p1_hdr_egr_ovfls.attr, - &dev_attr_rx_p2_hdr_egr_ovfls.attr, - &dev_attr_rx_p3_hdr_egr_ovfls.attr, - &dev_attr_rx_p4_hdr_egr_ovfls.attr, - &dev_attr_rx_p5_hdr_egr_ovfls.attr, - &dev_attr_rx_p6_hdr_egr_ovfls.attr, - &dev_attr_rx_p7_hdr_egr_ovfls.attr, - &dev_attr_rx_p8_hdr_egr_ovfls.attr, - &dev_attr_rx_pkey_mismatches.attr, - &dev_attr_rx_tid_full_errs.attr, - &dev_attr_rx_tid_valid_errs.attr, - &dev_attr_rx_vcrc_errs.attr, - &dev_attr_tx_data_pkts.attr, - &dev_attr_tx_dropped_pkts.attr, - &dev_attr_tx_dwords.attr, - &dev_attr_tx_flow_pkts.attr, - &dev_attr_tx_flow_stalls.attr, - &dev_attr_tx_len_errs.attr, - &dev_attr_tx_max_min_len_errs.attr, - &dev_attr_tx_underruns.attr, - &dev_attr_tx_unsup_vl_errs.attr, - NULL -}; - -static struct attribute_group dev_counter_attr_group = { - .name = "counters", - .attrs = dev_counter_attributes -}; - -static ssize_t store_reset(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - if (count < 5 || memcmp(buf, "reset", 5)) { - ret = -EINVAL; - goto bail; - } - - if (dd->ipath_flags & IPATH_DISABLED) { - /* - * post-reset init would re-enable interrupts, etc. - * so don't allow reset on disabled devices. Not - * perfect error, but about the best choice. - */ - dev_info(dev,"Unit %d is disabled, can't reset\n", - dd->ipath_unit); - ret = -EINVAL; - goto bail; - } - ret = ipath_reset_device(dd->ipath_unit); -bail: - return ret<0 ? ret : count; -} - -static ssize_t store_link_state(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 state; - - ret = ipath_parse_ushort(buf, &state); - if (ret < 0) - goto invalid; - - r = ipath_set_linkstate(dd, state); - if (r < 0) { - ret = r; - goto bail; - } - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid link state\n"); -bail: - return ret; -} - -static ssize_t show_mtu(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu); -} - -static ssize_t store_mtu(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - ssize_t ret; - u16 mtu = 0; - int r; - - ret = ipath_parse_ushort(buf, &mtu); - if (ret < 0) - goto invalid; - - r = ipath_set_mtu(dd, mtu); - if (r < 0) - ret = r; - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid MTU\n"); -bail: - return ret; -} - -static ssize_t show_enabled(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", - (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1); -} - -static ssize_t store_enabled(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - ssize_t ret; - u16 enable = 0; - - ret = ipath_parse_ushort(buf, &enable); - if (ret < 0) { - ipath_dev_err(dd, "attempt to use non-numeric on enable\n"); - goto bail; - } - - if (enable) { - if (!(dd->ipath_flags & IPATH_DISABLED)) - goto bail; - - dev_info(dev, "Enabling unit %d\n", dd->ipath_unit); - /* same as post-reset */ - ret = ipath_init_chip(dd, 1); - if (ret) - ipath_dev_err(dd, "Failed to enable unit %d\n", - dd->ipath_unit); - else { - dd->ipath_flags &= ~IPATH_DISABLED; - *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED; - } - } - else if (!(dd->ipath_flags & IPATH_DISABLED)) { - dev_info(dev, "Disabling unit %d\n", dd->ipath_unit); - ipath_shutdown_device(dd); - dd->ipath_flags |= IPATH_DISABLED; - *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED; - } - -bail: - return ret; -} - -static ssize_t store_rx_pol_inv(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret < 0) - goto invalid; - - r = ipath_set_rx_pol_inv(dd, val); - if (r < 0) { - ret = r; - goto bail; - } - - goto bail; -invalid: - ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n"); -bail: - return ret; -} - -static ssize_t store_led_override(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret > 0) - ipath_set_led_override(dd, val); - else - ipath_dev_err(dd, "attempt to set invalid LED override\n"); - return ret; -} - -static ssize_t show_logged_errs(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int idx, count; - - /* force consistency with actual EEPROM */ - if (ipath_update_eeprom_log(dd) != 0) - return -ENXIO; - - count = 0; - for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { - count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", - dd->ipath_eep_st_errs[idx], - idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' '); - } - - return count; -} - -/* - * New sysfs entries to control various IB config. These all turn into - * accesses via ipath_f_get/set_ib_cfg. - * - * Get/Set heartbeat enable. Or of 1=enabled, 2=auto - */ -static ssize_t show_hrtbt_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_hrtbt_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && val > 3) - ret = -EINVAL; - if (ret < 0) { - ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); - goto bail; - } - - /* - * Set the "intentional" heartbeat enable per either of - * "Enable" and "Auto", as these are normally set together. - * This bit is consulted when leaving loopback mode, - * because entering loopback mode overrides it and automatically - * disables heartbeat. - */ - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val); - if (r < 0) - ret = r; - else if (val == IPATH_IB_HRTBT_OFF) - dd->ipath_flags |= IPATH_NO_HRTBT; - else - dd->ipath_flags &= ~IPATH_NO_HRTBT; - -bail: - return ret; -} - -/* - * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric, - * _not_ the particular encoding of any given chip) - */ -static ssize_t show_lwid_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_lwid_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && (val == 0 || val > 3)) - ret = -EINVAL; - if (ret < 0) { - ipath_dev_err(dd, - "attempt to set invalid Link Width (enable)\n"); - goto bail; - } - - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val); - if (r < 0) - ret = r; - -bail: - return ret; -} - -/* Get current link width */ -static ssize_t show_lwid(struct device *dev, - struct device_attribute *attr, - char *buf) - -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -/* - * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR. - */ -static ssize_t show_spd_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_spd_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR))) - ret = -EINVAL; - if (ret < 0) { - ipath_dev_err(dd, - "attempt to set invalid Link Speed (enable)\n"); - goto bail; - } - - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val); - if (r < 0) - ret = r; - -bail: - return ret; -} - -/* Get current link speed */ -static ssize_t show_spd(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -/* - * Get/Set RX polarity-invert enable. 0=no, 1=yes. - */ -static ssize_t show_rx_polinv_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_rx_polinv_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && val > 1) { - ipath_dev_err(dd, - "attempt to set invalid Rx Polarity (enable)\n"); - ret = -EINVAL; - goto bail; - } - - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val); - if (r < 0) - ret = r; - -bail: - return ret; -} - -/* - * Get/Set RX lane-reversal enable. 0=no, 1=yes. - */ -static ssize_t show_lanerev_enb(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - - ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB); - if (ret >= 0) - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; -} - -static ssize_t store_lanerev_enb(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, r; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret >= 0 && val > 1) { - ret = -EINVAL; - ipath_dev_err(dd, - "attempt to set invalid Lane reversal (enable)\n"); - goto bail; - } - - r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val); - if (r < 0) - ret = r; - -bail: - return ret; -} - -static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); -static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); - -static struct attribute *driver_attributes[] = { - &driver_attr_num_units.attr, - &driver_attr_version.attr, - NULL -}; - -static struct attribute_group driver_attr_group = { - .attrs = driver_attributes -}; - -static ssize_t store_tempsense(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret, stat; - u16 val; - - ret = ipath_parse_ushort(buf, &val); - if (ret <= 0) { - ipath_dev_err(dd, "attempt to set invalid tempsense config\n"); - goto bail; - } - /* If anything but the highest limit, enable T_CRIT_A "interrupt" */ - stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0); - if (stat) { - ipath_dev_err(dd, "Unable to set tempsense config\n"); - ret = -1; - goto bail; - } - stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF)); - if (stat) { - ipath_dev_err(dd, "Unable to set local Tcrit\n"); - ret = -1; - goto bail; - } - stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8)); - if (stat) { - ipath_dev_err(dd, "Unable to set remote Tcrit\n"); - ret = -1; - goto bail; - } - -bail: - return ret; -} - -/* - * dump tempsense regs. in decimal, to ease shell-scripts. - */ -static ssize_t show_tempsense(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ipath_devdata *dd = dev_get_drvdata(dev); - int ret; - int idx; - u8 regvals[8]; - - ret = -ENXIO; - for (idx = 0; idx < 8; ++idx) { - if (idx == 6) - continue; - ret = ipath_tempsense_read(dd, idx); - if (ret < 0) - break; - regvals[idx] = ret; - } - if (idx == 8) - ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", - *(signed char *)(regvals), - *(signed char *)(regvals + 1), - regvals[2], regvals[3], - *(signed char *)(regvals + 5), - *(signed char *)(regvals + 7)); - return ret; -} - -const struct attribute_group *ipath_driver_attr_groups[] = { - &driver_attr_group, - NULL, -}; - -static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid); -static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc); -static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid); -static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state); -static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid); -static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu); -static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled); -static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL); -static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL); -static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset); -static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); -static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); -static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); -static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); -static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); -static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); -static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); -static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); -static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); -static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO, - show_jint_max_packets, store_jint_max_packets); -static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO, - show_jint_idle_ticks, store_jint_idle_ticks); -static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO, - show_tempsense, store_tempsense); - -static struct attribute *dev_attributes[] = { - &dev_attr_guid.attr, - &dev_attr_lmc.attr, - &dev_attr_lid.attr, - &dev_attr_link_state.attr, - &dev_attr_mlid.attr, - &dev_attr_mtu.attr, - &dev_attr_nguid.attr, - &dev_attr_nports.attr, - &dev_attr_serial.attr, - &dev_attr_status.attr, - &dev_attr_status_str.attr, - &dev_attr_boardversion.attr, - &dev_attr_unit.attr, - &dev_attr_enabled.attr, - &dev_attr_rx_pol_inv.attr, - &dev_attr_led_override.attr, - &dev_attr_logged_errors.attr, - &dev_attr_tempsense.attr, - &dev_attr_localbus_info.attr, - NULL -}; - -static struct attribute_group dev_attr_group = { - .attrs = dev_attributes -}; - -static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb, - store_hrtbt_enb); -static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb, - store_lwid_enb); -static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL); -static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb, - store_spd_enb); -static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL); -static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb, - store_rx_polinv_enb); -static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb, - store_lanerev_enb); - -static struct attribute *dev_ibcfg_attributes[] = { - &dev_attr_hrtbt_enable.attr, - &dev_attr_link_width_enable.attr, - &dev_attr_link_width.attr, - &dev_attr_link_speed_enable.attr, - &dev_attr_link_speed.attr, - &dev_attr_rx_pol_inv_enable.attr, - &dev_attr_rx_lane_rev_enable.attr, - NULL -}; - -static struct attribute_group dev_ibcfg_attr_group = { - .attrs = dev_ibcfg_attributes -}; - -/** - * ipath_expose_reset - create a device reset file - * @dev: the device structure - * - * Only expose a file that lets us reset the device after someone - * enters diag mode. A device reset is quite likely to crash the - * machine entirely, so we don't want to normally make it - * available. - * - * Called with ipath_mutex held. - */ -int ipath_expose_reset(struct device *dev) -{ - static int exposed; - int ret; - - if (!exposed) { - ret = device_create_file(dev, &dev_attr_reset); - exposed = 1; - } - else - ret = 0; - - return ret; -} - -int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) -{ - int ret; - - ret = sysfs_create_group(&dev->kobj, &dev_attr_group); - if (ret) - goto bail; - - ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group); - if (ret) - goto bail_attrs; - - if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { - ret = device_create_file(dev, &dev_attr_jint_idle_ticks); - if (ret) - goto bail_counter; - ret = device_create_file(dev, &dev_attr_jint_max_packets); - if (ret) - goto bail_idle; - - ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group); - if (ret) - goto bail_max; - } - - return 0; - -bail_max: - device_remove_file(dev, &dev_attr_jint_max_packets); -bail_idle: - device_remove_file(dev, &dev_attr_jint_idle_ticks); -bail_counter: - sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); -bail_attrs: - sysfs_remove_group(&dev->kobj, &dev_attr_group); -bail: - return ret; -} - -void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd) -{ - sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); - - if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) { - sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group); - device_remove_file(dev, &dev_attr_jint_idle_ticks); - device_remove_file(dev, &dev_attr_jint_max_packets); - } - - sysfs_remove_group(&dev->kobj, &dev_attr_group); - - device_remove_file(dev, &dev_attr_reset); -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_uc.c b/kernel/drivers/infiniband/hw/ipath/ipath_uc.c deleted file mode 100644 index 22e60998f..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_uc.c +++ /dev/null @@ -1,547 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_UC_##x - -/** - * ipath_make_uc_req - construct a request packet (SEND, RDMA write) - * @qp: a pointer to the QP - * - * Return 1 if constructed; otherwise, return 0. - */ -int ipath_make_uc_req(struct ipath_qp *qp) -{ - struct ipath_other_headers *ohdr; - struct ipath_swqe *wqe; - unsigned long flags; - u32 hwords; - u32 bth0; - u32 len; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - int ret = 0; - - spin_lock_irqsave(&qp->s_lock, flags); - - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { - if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) - goto bail; - /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) - goto bail; - /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { - qp->s_flags |= IPATH_S_WAIT_DMA; - goto bail; - } - wqe = get_swqe_ptr(qp, qp->s_last); - ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); - goto done; - } - - ohdr = &qp->s_hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; - - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - hwords = 5; - bth0 = 1 << 22; /* Set M bit */ - - /* Get the next send request. */ - wqe = get_swqe_ptr(qp, qp->s_cur); - qp->s_wqe = NULL; - switch (qp->s_state) { - default: - if (!(ib_ipath_state_ops[qp->state] & - IPATH_PROCESS_NEXT_SEND_OK)) - goto bail; - /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) - goto bail; - /* - * Start a new request. - */ - qp->s_psn = wqe->psn = qp->s_next_psn; - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - qp->s_len = len = wqe->length; - switch (wqe->wr.opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - if (len > pmtu) { - qp->s_state = OP(SEND_FIRST); - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_SEND) - qp->s_state = OP(SEND_ONLY); - else { - qp->s_state = - OP(SEND_ONLY_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - qp->s_wqe = wqe; - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(len); - hwords += sizeof(struct ib_reth) / 4; - if (len > pmtu) { - qp->s_state = OP(RDMA_WRITE_FIRST); - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) - qp->s_state = OP(RDMA_WRITE_ONLY); - else { - qp->s_state = - OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); - /* Immediate data comes after the RETH */ - ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - } - qp->s_wqe = wqe; - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - default: - goto bail; - } - break; - - case OP(SEND_FIRST): - qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ - case OP(SEND_MIDDLE): - len = qp->s_len; - if (len > pmtu) { - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_SEND) - qp->s_state = OP(SEND_LAST); - else { - qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - qp->s_wqe = wqe; - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - - case OP(RDMA_WRITE_FIRST): - qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ - case OP(RDMA_WRITE_MIDDLE): - len = qp->s_len; - if (len > pmtu) { - len = pmtu; - break; - } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) - qp->s_state = OP(RDMA_WRITE_LAST); - else { - qp->s_state = - OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); - /* Immediate data comes after the BTH */ - ohdr->u.imm_data = wqe->wr.ex.imm_data; - hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - } - qp->s_wqe = wqe; - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; - break; - } - qp->s_len -= len; - qp->s_hdrwords = hwords; - qp->s_cur_sge = &qp->s_sge; - qp->s_cur_size = len; - ipath_make_ruc_header(to_idev(qp->ibqp.device), - qp, ohdr, bth0 | (qp->s_state << 24), - qp->s_next_psn++ & IPATH_PSN_MASK); -done: - ret = 1; - goto unlock; - -bail: - qp->s_flags &= ~IPATH_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * ipath_uc_rcv - handle an incoming UC packet - * @dev: the device the packet came in on - * @hdr: the header of the packet - * @has_grh: true if the packet has a GRH - * @data: the packet data - * @tlen: the length of the packet - * @qp: the QP for this packet. - * - * This is called from ipath_qp_rcv() to process an incoming UC packet - * for the given QP. - * Called at interrupt level. - */ -void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp) -{ - struct ipath_other_headers *ohdr; - int opcode; - u32 hdrsize; - u32 psn; - u32 pad; - struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - struct ib_reth *reth; - int header_in_data; - - /* Validate the SLID. See Ch. 9.6.1.5 */ - if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) - goto done; - - /* Check for GRH */ - if (!has_grh) { - ohdr = &hdr->u.oth; - hdrsize = 8 + 12; /* LRH + BTH */ - psn = be32_to_cpu(ohdr->bth[2]); - header_in_data = 0; - } else { - ohdr = &hdr->u.l.oth; - hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ - /* - * The header with GRH is 60 bytes and the - * core driver sets the eager header buffer - * size to 56 bytes so the last 4 bytes of - * the BTH header (PSN) is in the data buffer. - */ - header_in_data = dev->dd->ipath_rcvhdrentsize == 16; - if (header_in_data) { - psn = be32_to_cpu(((__be32 *) data)[0]); - data += sizeof(__be32); - } else - psn = be32_to_cpu(ohdr->bth[2]); - } - /* - * The opcode is in the low byte when its in network order - * (top byte when in host order). - */ - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - - memset(&wc, 0, sizeof wc); - - /* Compare the PSN verses the expected PSN. */ - if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { - /* - * Handle a sequence error. - * Silently drop any current message. - */ - qp->r_psn = psn; - inv: - qp->r_state = OP(SEND_LAST); - switch (opcode) { - case OP(SEND_FIRST): - case OP(SEND_ONLY): - case OP(SEND_ONLY_WITH_IMMEDIATE): - goto send_first; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_ONLY): - case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): - goto rdma_first; - - default: - dev->n_pkt_drops++; - goto done; - } - } - - /* Check for opcode sequence errors. */ - switch (qp->r_state) { - case OP(SEND_FIRST): - case OP(SEND_MIDDLE): - if (opcode == OP(SEND_MIDDLE) || - opcode == OP(SEND_LAST) || - opcode == OP(SEND_LAST_WITH_IMMEDIATE)) - break; - goto inv; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_MIDDLE): - if (opcode == OP(RDMA_WRITE_MIDDLE) || - opcode == OP(RDMA_WRITE_LAST) || - opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) - break; - goto inv; - - default: - if (opcode == OP(SEND_FIRST) || - opcode == OP(SEND_ONLY) || - opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || - opcode == OP(RDMA_WRITE_FIRST) || - opcode == OP(RDMA_WRITE_ONLY) || - opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) - break; - goto inv; - } - - /* OK, process the packet. */ - switch (opcode) { - case OP(SEND_FIRST): - case OP(SEND_ONLY): - case OP(SEND_ONLY_WITH_IMMEDIATE): - send_first: - if (qp->r_flags & IPATH_R_REUSE_SGE) { - qp->r_flags &= ~IPATH_R_REUSE_SGE; - qp->r_sge = qp->s_rdma_read_sge; - } else if (!ipath_get_rwqe(qp, 0)) { - dev->n_pkt_drops++; - goto done; - } - /* Save the WQE so we can reuse it in case of an error. */ - qp->s_rdma_read_sge = qp->r_sge; - qp->r_rcv_len = 0; - if (opcode == OP(SEND_ONLY)) - goto send_last; - else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) - goto send_last_imm; - /* FALLTHROUGH */ - case OP(SEND_MIDDLE): - /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto done; - } - qp->r_rcv_len += pmtu; - if (unlikely(qp->r_rcv_len > qp->r_len)) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto done; - } - ipath_copy_sge(&qp->r_sge, data, pmtu); - break; - - case OP(SEND_LAST_WITH_IMMEDIATE): - send_last_imm: - if (header_in_data) { - wc.ex.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else { - /* Immediate data comes after BTH */ - wc.ex.imm_data = ohdr->u.imm_data; - } - hdrsize += 4; - wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ - case OP(SEND_LAST): - send_last: - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* Check for invalid length. */ - /* XXX LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto done; - } - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); - wc.byte_len = tlen + qp->r_rcv_len; - if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto done; - } - wc.opcode = IB_WC_RECV; - last_imm: - ipath_copy_sge(&qp->r_sge, data, tlen); - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(1 << 23)) != 0); - break; - - case OP(RDMA_WRITE_FIRST): - case OP(RDMA_WRITE_ONLY): - case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ - rdma_first: - /* RETH comes after BTH */ - if (!header_in_data) - reth = &ohdr->u.rc.reth; - else { - reth = (struct ib_reth *)data; - data += sizeof(*reth); - } - hdrsize += sizeof(*reth); - qp->r_len = be32_to_cpu(reth->length); - qp->r_rcv_len = 0; - if (qp->r_len != 0) { - u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); - int ok; - - /* Check rkey */ - ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, - vaddr, rkey, - IB_ACCESS_REMOTE_WRITE); - if (unlikely(!ok)) { - dev->n_pkt_drops++; - goto done; - } - } else { - qp->r_sge.sg_list = NULL; - qp->r_sge.sge.mr = NULL; - qp->r_sge.sge.vaddr = NULL; - qp->r_sge.sge.length = 0; - qp->r_sge.sge.sge_length = 0; - } - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) { - dev->n_pkt_drops++; - goto done; - } - if (opcode == OP(RDMA_WRITE_ONLY)) - goto rdma_last; - else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) - goto rdma_last_imm; - /* FALLTHROUGH */ - case OP(RDMA_WRITE_MIDDLE): - /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) { - dev->n_pkt_drops++; - goto done; - } - qp->r_rcv_len += pmtu; - if (unlikely(qp->r_rcv_len > qp->r_len)) { - dev->n_pkt_drops++; - goto done; - } - ipath_copy_sge(&qp->r_sge, data, pmtu); - break; - - case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): - rdma_last_imm: - if (header_in_data) { - wc.ex.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else { - /* Immediate data comes after BTH */ - wc.ex.imm_data = ohdr->u.imm_data; - } - hdrsize += 4; - wc.wc_flags = IB_WC_WITH_IMM; - - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* Check for invalid length. */ - /* XXX LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) { - dev->n_pkt_drops++; - goto done; - } - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); - if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { - dev->n_pkt_drops++; - goto done; - } - if (qp->r_flags & IPATH_R_REUSE_SGE) - qp->r_flags &= ~IPATH_R_REUSE_SGE; - else if (!ipath_get_rwqe(qp, 1)) { - dev->n_pkt_drops++; - goto done; - } - wc.byte_len = qp->r_len; - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; - goto last_imm; - - case OP(RDMA_WRITE_LAST): - rdma_last: - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - /* Check for invalid length. */ - /* XXX LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) { - dev->n_pkt_drops++; - goto done; - } - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); - if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { - dev->n_pkt_drops++; - goto done; - } - ipath_copy_sge(&qp->r_sge, data, tlen); - break; - - default: - /* Drop packet for unknown opcodes. */ - dev->n_pkt_drops++; - goto done; - } - qp->r_psn++; - qp->r_state = opcode; -done: - return; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_ud.c b/kernel/drivers/infiniband/hw/ipath/ipath_ud.c deleted file mode 100644 index e8a2a9152..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_ud.c +++ /dev/null @@ -1,580 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/sched.h> -#include <rdma/ib_smi.h> - -#include "ipath_verbs.h" -#include "ipath_kernel.h" - -/** - * ipath_ud_loopback - handle send on loopback QPs - * @sqp: the sending QP - * @swqe: the send work request - * - * This is called from ipath_make_ud_req() to forward a WQE addressed - * to the same HCA. - * Note that the receive interrupt handler may be calling ipath_ud_rcv() - * while this is being called. - */ -static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) -{ - struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); - struct ipath_qp *qp; - struct ib_ah_attr *ah_attr; - unsigned long flags; - struct ipath_rq *rq; - struct ipath_srq *srq; - struct ipath_sge_state rsge; - struct ipath_sge *sge; - struct ipath_rwq *wq; - struct ipath_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - struct ib_wc wc; - u32 tail; - u32 rlen; - u32 length; - - qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); - if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { - dev->n_pkt_drops++; - goto done; - } - - /* - * Check that the qkey matches (except for QP0, see 9.6.1.4.1). - * Qkeys with the high order bit set mean use the - * qkey from the QP context instead of the WR (see 10.2.5). - */ - if (unlikely(qp->ibqp.qp_num && - ((int) swqe->wr.wr.ud.remote_qkey < 0 ? - sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) { - /* XXX OK to lose a count once in a while. */ - dev->qkey_violations++; - dev->n_pkt_drops++; - goto drop; - } - - /* - * A GRH is expected to precede the data even if not - * present on the wire. - */ - length = swqe->length; - memset(&wc, 0, sizeof wc); - wc.byte_len = length + sizeof(struct ib_grh); - - if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { - wc.wc_flags = IB_WC_WITH_IMM; - wc.ex.imm_data = swqe->wr.ex.imm_data; - } - - /* - * This would be a lot simpler if we could call ipath_get_rwqe() - * but that uses state that the receive interrupt handler uses - * so we would need to lock out receive interrupts while doing - * local loopback. - */ - if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - /* - * Get the next work request entry to find where to put the data. - * Note that it is safe to drop the lock after changing rq->tail - * since ipath_post_receive() won't fill the empty slot. - */ - spin_lock_irqsave(&rq->lock, flags); - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - if (unlikely(tail == wq->head)) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto drop; - } - wqe = get_rwqe_ptr(rq, tail); - rsge.sg_list = qp->r_ud_sg_list; - if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto drop; - } - /* Silently drop packets which are too big. */ - if (wc.byte_len > rlen) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto drop; - } - if (++tail >= rq->size) - tail = 0; - wq->tail = tail; - wc.wr_id = wqe->wr_id; - if (handler) { - u32 n; - - /* - * validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - } else - spin_unlock_irqrestore(&rq->lock, flags); - } else - spin_unlock_irqrestore(&rq->lock, flags); - - ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; - if (ah_attr->ah_flags & IB_AH_GRH) { - ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); - wc.wc_flags |= IB_WC_GRH; - } else - ipath_skip_sge(&rsge, sizeof(struct ib_grh)); - sge = swqe->sg_list; - while (length) { - u32 len = sge->length; - - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - ipath_copy_sge(&rsge, sge->vaddr, len); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--swqe->wr.num_sge) - sge++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - length -= len; - } - wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - wc.src_qp = sqp->ibqp.qp_num; - /* XXX do we know which pkey matched? Only needed for GSI. */ - wc.pkey_index = 0; - wc.slid = dev->dd->ipath_lid | - (ah_attr->src_path_bits & - ((1 << dev->dd->ipath_lmc) - 1)); - wc.sl = ah_attr->sl; - wc.dlid_path_bits = - ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1); - wc.port_num = 1; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - swqe->wr.send_flags & IB_SEND_SOLICITED); -drop: - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); -done:; -} - -/** - * ipath_make_ud_req - construct a UD request packet - * @qp: the QP - * - * Return 1 if constructed; otherwise, return 0. - */ -int ipath_make_ud_req(struct ipath_qp *qp) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_other_headers *ohdr; - struct ib_ah_attr *ah_attr; - struct ipath_swqe *wqe; - unsigned long flags; - u32 nwords; - u32 extra_bytes; - u32 bth0; - u16 lrh0; - u16 lid; - int ret = 0; - int next_cur; - - spin_lock_irqsave(&qp->s_lock, flags); - - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { - if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) - goto bail; - /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) - goto bail; - /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&qp->s_dma_busy)) { - qp->s_flags |= IPATH_S_WAIT_DMA; - goto bail; - } - wqe = get_swqe_ptr(qp, qp->s_last); - ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); - goto done; - } - - if (qp->s_cur == qp->s_head) - goto bail; - - wqe = get_swqe_ptr(qp, qp->s_cur); - next_cur = qp->s_cur + 1; - if (next_cur >= qp->s_size) - next_cur = 0; - - /* Construct the header. */ - ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; - if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) { - if (ah_attr->dlid != IPATH_PERMISSIVE_LID) - dev->n_multicast_xmit++; - else - dev->n_unicast_xmit++; - } else { - dev->n_unicast_xmit++; - lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1); - if (unlikely(lid == dev->dd->ipath_lid)) { - /* - * If DMAs are in progress, we can't generate - * a completion for the loopback packet since - * it would be out of order. - * XXX Instead of waiting, we could queue a - * zero length descriptor so we get a callback. - */ - if (atomic_read(&qp->s_dma_busy)) { - qp->s_flags |= IPATH_S_WAIT_DMA; - goto bail; - } - qp->s_cur = next_cur; - spin_unlock_irqrestore(&qp->s_lock, flags); - ipath_ud_loopback(qp, wqe); - spin_lock_irqsave(&qp->s_lock, flags); - ipath_send_complete(qp, wqe, IB_WC_SUCCESS); - goto done; - } - } - - qp->s_cur = next_cur; - extra_bytes = -wqe->length & 3; - nwords = (wqe->length + extra_bytes) >> 2; - - /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ - qp->s_hdrwords = 7; - qp->s_cur_size = wqe->length; - qp->s_cur_sge = &qp->s_sge; - qp->s_dmult = ah_attr->static_rate; - qp->s_wqe = wqe; - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - - if (ah_attr->ah_flags & IB_AH_GRH) { - /* Header size in 32-bit words. */ - qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, - &ah_attr->grh, - qp->s_hdrwords, nwords); - lrh0 = IPATH_LRH_GRH; - ohdr = &qp->s_hdr.u.l.oth; - /* - * Don't worry about sending to locally attached multicast - * QPs. It is unspecified by the spec. what happens. - */ - } else { - /* Header size in 32-bit words. */ - lrh0 = IPATH_LRH_BTH; - ohdr = &qp->s_hdr.u.oth; - } - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { - qp->s_hdrwords++; - ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; - bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; - } else - bth0 = IB_OPCODE_UD_SEND_ONLY << 24; - lrh0 |= ah_attr->sl << 4; - if (qp->ibqp.qp_type == IB_QPT_SMI) - lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + - SIZE_OF_CRC); - lid = dev->dd->ipath_lid; - if (lid) { - lid |= ah_attr->src_path_bits & - ((1 << dev->dd->ipath_lmc) - 1); - qp->s_hdr.lrh[3] = cpu_to_be16(lid); - } else - qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= 1 << 23; - bth0 |= extra_bytes << 20; - bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : - ipath_get_pkey(dev->dd, qp->s_pkey_index); - ohdr->bth[0] = cpu_to_be32(bth0); - /* - * Use the multicast QP if the destination LID is a multicast LID. - */ - ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && - ah_attr->dlid != IPATH_PERMISSIVE_LID ? - cpu_to_be32(IPATH_MULTICAST_QPN) : - cpu_to_be32(wqe->wr.wr.ud.remote_qpn); - ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK); - /* - * Qkeys with the high order bit set mean use the - * qkey from the QP context instead of the WR (see 10.2.5). - */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? - qp->qkey : wqe->wr.wr.ud.remote_qkey); - ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); - -done: - ret = 1; - goto unlock; - -bail: - qp->s_flags &= ~IPATH_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * ipath_ud_rcv - receive an incoming UD packet - * @dev: the device the packet came in on - * @hdr: the packet header - * @has_grh: true if the packet has a GRH - * @data: the packet data - * @tlen: the packet length - * @qp: the QP the packet came on - * - * This is called from ipath_qp_rcv() to process an incoming UD packet - * for the given QP. - * Called at interrupt level. - */ -void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp) -{ - struct ipath_other_headers *ohdr; - int opcode; - u32 hdrsize; - u32 pad; - struct ib_wc wc; - u32 qkey; - u32 src_qp; - u16 dlid; - int header_in_data; - - /* Check for GRH */ - if (!has_grh) { - ohdr = &hdr->u.oth; - hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */ - qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]); - header_in_data = 0; - } else { - ohdr = &hdr->u.l.oth; - hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */ - /* - * The header with GRH is 68 bytes and the core driver sets - * the eager header buffer size to 56 bytes so the last 12 - * bytes of the IB header is in the data buffer. - */ - header_in_data = dev->dd->ipath_rcvhdrentsize == 16; - if (header_in_data) { - qkey = be32_to_cpu(((__be32 *) data)[1]); - src_qp = be32_to_cpu(((__be32 *) data)[2]); - data += 12; - } else { - qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]); - } - } - src_qp &= IPATH_QPN_MASK; - - /* - * Check that the permissive LID is only used on QP0 - * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). - */ - if (qp->ibqp.qp_num) { - if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE)) { - dev->n_pkt_drops++; - goto bail; - } - if (unlikely(qkey != qp->qkey)) { - /* XXX OK to lose a count once in a while. */ - dev->qkey_violations++; - dev->n_pkt_drops++; - goto bail; - } - } else if (hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE) { - struct ib_smp *smp = (struct ib_smp *) data; - - if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - dev->n_pkt_drops++; - goto bail; - } - } - - /* - * The opcode is in the low byte when its in network order - * (top byte when in host order). - */ - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - if (qp->ibqp.qp_num > 1 && - opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { - if (header_in_data) { - wc.ex.imm_data = *(__be32 *) data; - data += sizeof(__be32); - } else - wc.ex.imm_data = ohdr->u.ud.imm_data; - wc.wc_flags = IB_WC_WITH_IMM; - hdrsize += sizeof(u32); - } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { - wc.ex.imm_data = 0; - wc.wc_flags = 0; - } else { - dev->n_pkt_drops++; - goto bail; - } - - /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - if (unlikely(tlen < (hdrsize + pad + 4))) { - /* Drop incomplete packets. */ - dev->n_pkt_drops++; - goto bail; - } - tlen -= hdrsize + pad + 4; - - /* Drop invalid MAD packets (see 13.5.3.1). */ - if (unlikely((qp->ibqp.qp_num == 0 && - (tlen != 256 || - (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) || - (qp->ibqp.qp_num == 1 && - (tlen != 256 || - (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) { - dev->n_pkt_drops++; - goto bail; - } - - /* - * A GRH is expected to precede the data even if not - * present on the wire. - */ - wc.byte_len = tlen + sizeof(struct ib_grh); - - /* - * Get the next work request entry to find where to put the data. - */ - if (qp->r_flags & IPATH_R_REUSE_SGE) - qp->r_flags &= ~IPATH_R_REUSE_SGE; - else if (!ipath_get_rwqe(qp, 0)) { - /* - * Count VL15 packets dropped due to no receive buffer. - * Otherwise, count them as buffer overruns since usually, - * the HW will be able to receive packets even if there are - * no QPs with posted receive buffers. - */ - if (qp->ibqp.qp_num == 0) - dev->n_vl15_dropped++; - else - dev->rcv_errors++; - goto bail; - } - /* Silently drop packets which are too big. */ - if (wc.byte_len > qp->r_len) { - qp->r_flags |= IPATH_R_REUSE_SGE; - dev->n_pkt_drops++; - goto bail; - } - if (has_grh) { - ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, - sizeof(struct ib_grh)); - wc.wc_flags |= IB_WC_GRH; - } else - ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); - ipath_copy_sge(&qp->r_sge, data, - wc.byte_len - sizeof(struct ib_grh)); - if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) - goto bail; - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; - wc.qp = &qp->ibqp; - wc.src_qp = src_qp; - /* XXX do we know which pkey matched? Only needed for GSI. */ - wc.pkey_index = 0; - wc.slid = be16_to_cpu(hdr->lrh[3]); - wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF; - dlid = be16_to_cpu(hdr->lrh[1]); - /* - * Save the LMC lower bits if the destination LID is a unicast LID. - */ - wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 : - dlid & ((1 << dev->dd->ipath_lmc) - 1); - wc.port_num = 1; - /* Signal completion event if the solicited bit is set. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(1 << 23)) != 0); - -bail:; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_user_pages.c b/kernel/drivers/infiniband/hw/ipath/ipath_user_pages.c deleted file mode 100644 index 1da1252dc..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/mm.h> -#include <linux/device.h> -#include <linux/slab.h> -#include <linux/sched.h> - -#include "ipath_kernel.h" - -static void __ipath_release_user_pages(struct page **p, size_t num_pages, - int dirty) -{ - size_t i; - - for (i = 0; i < num_pages; i++) { - ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i, - (unsigned long) num_pages, p[i]); - if (dirty) - set_page_dirty_lock(p[i]); - put_page(p[i]); - } -} - -/* call with current->mm->mmap_sem held */ -static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p) -{ - unsigned long lock_limit; - size_t got; - int ret; - - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (num_pages > lock_limit) { - ret = -ENOMEM; - goto bail; - } - - ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n", - (unsigned long) num_pages, start_page); - - for (got = 0; got < num_pages; got += ret) { - ret = get_user_pages(current, current->mm, - start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, - p + got, NULL); - if (ret < 0) - goto bail_release; - } - - current->mm->pinned_vm += num_pages; - - ret = 0; - goto bail; - -bail_release: - __ipath_release_user_pages(p, got, 0); -bail: - return ret; -} - -/** - * ipath_map_page - a safety wrapper around pci_map_page() - * - * A dma_addr of all 0's is interpreted by the chip as "disabled". - * Unfortunately, it can also be a valid dma_addr returned on some - * architectures. - * - * The powerpc iommu assigns dma_addrs in ascending order, so we don't - * have to bother with retries or mapping a dummy page to insure we - * don't just get the same mapping again. - * - * I'm sure we won't be so lucky with other iommu's, so FIXME. - */ -dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) -{ - dma_addr_t phys; - - phys = pci_map_page(hwdev, page, offset, size, direction); - - if (phys == 0) { - pci_unmap_page(hwdev, phys, size, direction); - phys = pci_map_page(hwdev, page, offset, size, direction); - /* - * FIXME: If we get 0 again, we should keep this page, - * map another, then free the 0 page. - */ - } - - return phys; -} - -/** - * ipath_map_single - a safety wrapper around pci_map_single() - * - * Same idea as ipath_map_page(). - */ -dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size, - int direction) -{ - dma_addr_t phys; - - phys = pci_map_single(hwdev, ptr, size, direction); - - if (phys == 0) { - pci_unmap_single(hwdev, phys, size, direction); - phys = pci_map_single(hwdev, ptr, size, direction); - /* - * FIXME: If we get 0 again, we should keep this page, - * map another, then free the 0 page. - */ - } - - return phys; -} - -/** - * ipath_get_user_pages - lock user pages into memory - * @start_page: the start page - * @num_pages: the number of pages - * @p: the output page structures - * - * This function takes a given start page (page aligned user virtual - * address) and pins it and the following specified number of pages. For - * now, num_pages is always 1, but that will probably change at some point - * (because caller is doing expected sends on a single virtually contiguous - * buffer, so we can do all pages at once). - */ -int ipath_get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p) -{ - int ret; - - down_write(¤t->mm->mmap_sem); - - ret = __ipath_get_user_pages(start_page, num_pages, p); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - -void ipath_release_user_pages(struct page **p, size_t num_pages) -{ - down_write(¤t->mm->mmap_sem); - - __ipath_release_user_pages(p, num_pages, 1); - - current->mm->pinned_vm -= num_pages; - - up_write(¤t->mm->mmap_sem); -} - -struct ipath_user_pages_work { - struct work_struct work; - struct mm_struct *mm; - unsigned long num_pages; -}; - -static void user_pages_account(struct work_struct *_work) -{ - struct ipath_user_pages_work *work = - container_of(_work, struct ipath_user_pages_work, work); - - down_write(&work->mm->mmap_sem); - work->mm->pinned_vm -= work->num_pages; - up_write(&work->mm->mmap_sem); - mmput(work->mm); - kfree(work); -} - -void ipath_release_user_pages_on_close(struct page **p, size_t num_pages) -{ - struct ipath_user_pages_work *work; - struct mm_struct *mm; - - __ipath_release_user_pages(p, num_pages, 1); - - mm = get_task_mm(current); - if (!mm) - return; - - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) - goto bail_mm; - - INIT_WORK(&work->work, user_pages_account); - work->mm = mm; - work->num_pages = num_pages; - - queue_work(ib_wq, &work->work); - return; - -bail_mm: - mmput(mm); - return; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/kernel/drivers/infiniband/hw/ipath/ipath_user_sdma.c deleted file mode 100644 index cc04b7ba3..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ /dev/null @@ -1,875 +0,0 @@ -/* - * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/mm.h> -#include <linux/types.h> -#include <linux/device.h> -#include <linux/dmapool.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/list.h> -#include <linux/highmem.h> -#include <linux/io.h> -#include <linux/uio.h> -#include <linux/rbtree.h> -#include <linux/spinlock.h> -#include <linux/delay.h> - -#include "ipath_kernel.h" -#include "ipath_user_sdma.h" - -/* minimum size of header */ -#define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64 -/* expected size of headers (for dma_pool) */ -#define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64 -/* length mask in PBC (lower 11 bits) */ -#define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1) - -struct ipath_user_sdma_pkt { - u8 naddr; /* dimension of addr (1..3) ... */ - u32 counter; /* sdma pkts queued counter for this entry */ - u64 added; /* global descq number of entries */ - - struct { - u32 offset; /* offset for kvaddr, addr */ - u32 length; /* length in page */ - u8 put_page; /* should we put_page? */ - u8 dma_mapped; /* is page dma_mapped? */ - struct page *page; /* may be NULL (coherent mem) */ - void *kvaddr; /* FIXME: only for pio hack */ - dma_addr_t addr; - } addr[4]; /* max pages, any more and we coalesce */ - struct list_head list; /* list element */ -}; - -struct ipath_user_sdma_queue { - /* - * pkts sent to dma engine are queued on this - * list head. the type of the elements of this - * list are struct ipath_user_sdma_pkt... - */ - struct list_head sent; - - /* headers with expected length are allocated from here... */ - char header_cache_name[64]; - struct dma_pool *header_cache; - - /* packets are allocated from the slab cache... */ - char pkt_slab_name[64]; - struct kmem_cache *pkt_slab; - - /* as packets go on the queued queue, they are counted... */ - u32 counter; - u32 sent_counter; - - /* dma page table */ - struct rb_root dma_pages_root; - - /* protect everything above... */ - struct mutex lock; -}; - -struct ipath_user_sdma_queue * -ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport) -{ - struct ipath_user_sdma_queue *pq = - kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL); - - if (!pq) - goto done; - - pq->counter = 0; - pq->sent_counter = 0; - INIT_LIST_HEAD(&pq->sent); - - mutex_init(&pq->lock); - - snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), - "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport); - pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name, - sizeof(struct ipath_user_sdma_pkt), - 0, 0, NULL); - - if (!pq->pkt_slab) - goto err_kfree; - - snprintf(pq->header_cache_name, sizeof(pq->header_cache_name), - "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport); - pq->header_cache = dma_pool_create(pq->header_cache_name, - dev, - IPATH_USER_SDMA_EXP_HEADER_LENGTH, - 4, 0); - if (!pq->header_cache) - goto err_slab; - - pq->dma_pages_root = RB_ROOT; - - goto done; - -err_slab: - kmem_cache_destroy(pq->pkt_slab); -err_kfree: - kfree(pq); - pq = NULL; - -done: - return pq; -} - -static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt, - int i, size_t offset, size_t len, - int put_page, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) -{ - pkt->addr[i].offset = offset; - pkt->addr[i].length = len; - pkt->addr[i].put_page = put_page; - pkt->addr[i].dma_mapped = dma_mapped; - pkt->addr[i].page = page; - pkt->addr[i].kvaddr = kvaddr; - pkt->addr[i].addr = dma_addr; -} - -static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt, - u32 counter, size_t offset, - size_t len, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) -{ - pkt->naddr = 1; - pkt->counter = counter; - ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page, - kvaddr, dma_addr); -} - -/* we've too many pages in the iovec, coalesce to a single page */ -static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, - struct ipath_user_sdma_pkt *pkt, - const struct iovec *iov, - unsigned long niov) { - int ret = 0; - struct page *page = alloc_page(GFP_KERNEL); - void *mpage_save; - char *mpage; - int i; - int len = 0; - dma_addr_t dma_addr; - - if (!page) { - ret = -ENOMEM; - goto done; - } - - mpage = kmap(page); - mpage_save = mpage; - for (i = 0; i < niov; i++) { - int cfur; - - cfur = copy_from_user(mpage, - iov[i].iov_base, iov[i].iov_len); - if (cfur) { - ret = -EFAULT; - goto free_unmap; - } - - mpage += iov[i].iov_len; - len += iov[i].iov_len; - } - - dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto free_unmap; - } - - ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save, - dma_addr); - pkt->naddr = 2; - - goto done; - -free_unmap: - kunmap(page); - __free_page(page); -done: - return ret; -} - -/* how many pages in this iovec element? */ -static int ipath_user_sdma_num_pages(const struct iovec *iov) -{ - const unsigned long addr = (unsigned long) iov->iov_base; - const unsigned long len = iov->iov_len; - const unsigned long spage = addr & PAGE_MASK; - const unsigned long epage = (addr + len - 1) & PAGE_MASK; - - return 1 + ((epage - spage) >> PAGE_SHIFT); -} - -/* truncate length to page boundary */ -static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len) -{ - const unsigned long offset = addr & ~PAGE_MASK; - - return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len; -} - -static void ipath_user_sdma_free_pkt_frag(struct device *dev, - struct ipath_user_sdma_queue *pq, - struct ipath_user_sdma_pkt *pkt, - int frag) -{ - const int i = frag; - - if (pkt->addr[i].page) { - if (pkt->addr[i].dma_mapped) - dma_unmap_page(dev, - pkt->addr[i].addr, - pkt->addr[i].length, - DMA_TO_DEVICE); - - if (pkt->addr[i].kvaddr) - kunmap(pkt->addr[i].page); - - if (pkt->addr[i].put_page) - put_page(pkt->addr[i].page); - else - __free_page(pkt->addr[i].page); - } else if (pkt->addr[i].kvaddr) - /* free coherent mem from cache... */ - dma_pool_free(pq->header_cache, - pkt->addr[i].kvaddr, pkt->addr[i].addr); -} - -/* return number of pages pinned... */ -static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, - struct ipath_user_sdma_pkt *pkt, - unsigned long addr, int tlen, int npages) -{ - struct page *pages[2]; - int j; - int ret; - - ret = get_user_pages_fast(addr, npages, 0, pages); - if (ret != npages) { - int i; - - for (i = 0; i < ret; i++) - put_page(pages[i]); - - ret = -ENOMEM; - goto done; - } - - for (j = 0; j < npages; j++) { - /* map the pages... */ - const int flen = - ipath_user_sdma_page_length(addr, tlen); - dma_addr_t dma_addr = - dma_map_page(&dd->pcidev->dev, - pages[j], 0, flen, DMA_TO_DEVICE); - unsigned long fofs = addr & ~PAGE_MASK; - - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto done; - } - - ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1, - pages[j], kmap(pages[j]), - dma_addr); - - pkt->naddr++; - addr += flen; - tlen -= flen; - } - -done: - return ret; -} - -static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - struct ipath_user_sdma_pkt *pkt, - const struct iovec *iov, - unsigned long niov) -{ - int ret = 0; - unsigned long idx; - - for (idx = 0; idx < niov; idx++) { - const int npages = ipath_user_sdma_num_pages(iov + idx); - const unsigned long addr = (unsigned long) iov[idx].iov_base; - - ret = ipath_user_sdma_pin_pages(dd, pkt, - addr, iov[idx].iov_len, - npages); - if (ret < 0) - goto free_pkt; - } - - goto done; - -free_pkt: - for (idx = 0; idx < pkt->naddr; idx++) - ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); - -done: - return ret; -} - -static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - struct ipath_user_sdma_pkt *pkt, - const struct iovec *iov, - unsigned long niov, int npages) -{ - int ret = 0; - - if (npages >= ARRAY_SIZE(pkt->addr)) - ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov); - else - ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); - - return ret; -} - -/* free a packet list -- return counter value of last packet */ -static void ipath_user_sdma_free_pkt_list(struct device *dev, - struct ipath_user_sdma_queue *pq, - struct list_head *list) -{ - struct ipath_user_sdma_pkt *pkt, *pkt_next; - - list_for_each_entry_safe(pkt, pkt_next, list, list) { - int i; - - for (i = 0; i < pkt->naddr; i++) - ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i); - - kmem_cache_free(pq->pkt_slab, pkt); - } -} - -/* - * copy headers, coalesce etc -- pq->lock must be held - * - * we queue all the packets to list, returning the - * number of bytes total. list must be empty initially, - * as, if there is an error we clean it... - */ -static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - struct list_head *list, - const struct iovec *iov, - unsigned long niov, - int maxpkts) -{ - unsigned long idx = 0; - int ret = 0; - int npkts = 0; - struct page *page = NULL; - __le32 *pbc; - dma_addr_t dma_addr; - struct ipath_user_sdma_pkt *pkt = NULL; - size_t len; - size_t nw; - u32 counter = pq->counter; - int dma_mapped = 0; - - while (idx < niov && npkts < maxpkts) { - const unsigned long addr = (unsigned long) iov[idx].iov_base; - const unsigned long idx_save = idx; - unsigned pktnw; - unsigned pktnwc; - int nfrags = 0; - int npages = 0; - int cfur; - - dma_mapped = 0; - len = iov[idx].iov_len; - nw = len >> 2; - page = NULL; - - pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); - if (!pkt) { - ret = -ENOMEM; - goto free_list; - } - - if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH || - len > PAGE_SIZE || len & 3 || addr & 3) { - ret = -EINVAL; - goto free_pkt; - } - - if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH) - pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL, - &dma_addr); - else - pbc = NULL; - - if (!pbc) { - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto free_pkt; - } - pbc = kmap(page); - } - - cfur = copy_from_user(pbc, iov[idx].iov_base, len); - if (cfur) { - ret = -EFAULT; - goto free_pbc; - } - - /* - * this assignment is a bit strange. it's because the - * the pbc counts the number of 32 bit words in the full - * packet _except_ the first word of the pbc itself... - */ - pktnwc = nw - 1; - - /* - * pktnw computation yields the number of 32 bit words - * that the caller has indicated in the PBC. note that - * this is one less than the total number of words that - * goes to the send DMA engine as the first 32 bit word - * of the PBC itself is not counted. Armed with this count, - * we can verify that the packet is consistent with the - * iovec lengths. - */ - pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK; - if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) { - ret = -EINVAL; - goto free_pbc; - } - - - idx++; - while (pktnwc < pktnw && idx < niov) { - const size_t slen = iov[idx].iov_len; - const unsigned long faddr = - (unsigned long) iov[idx].iov_base; - - if (slen & 3 || faddr & 3 || !slen || - slen > PAGE_SIZE) { - ret = -EINVAL; - goto free_pbc; - } - - npages++; - if ((faddr & PAGE_MASK) != - ((faddr + slen - 1) & PAGE_MASK)) - npages++; - - pktnwc += slen >> 2; - idx++; - nfrags++; - } - - if (pktnwc != pktnw) { - ret = -EINVAL; - goto free_pbc; - } - - if (page) { - dma_addr = dma_map_page(&dd->pcidev->dev, - page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto free_pbc; - } - - dma_mapped = 1; - } - - ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped, - page, pbc, dma_addr); - - if (nfrags) { - ret = ipath_user_sdma_init_payload(dd, pq, pkt, - iov + idx_save + 1, - nfrags, npages); - if (ret < 0) - goto free_pbc_dma; - } - - counter++; - npkts++; - - list_add_tail(&pkt->list, list); - } - - ret = idx; - goto done; - -free_pbc_dma: - if (dma_mapped) - dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE); -free_pbc: - if (page) { - kunmap(page); - __free_page(page); - } else - dma_pool_free(pq->header_cache, pbc, dma_addr); -free_pkt: - kmem_cache_free(pq->pkt_slab, pkt); -free_list: - ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); -done: - return ret; -} - -static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq, - u32 c) -{ - pq->sent_counter = c; -} - -/* try to clean out queue -- needs pq->lock */ -static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq) -{ - struct list_head free_list; - struct ipath_user_sdma_pkt *pkt; - struct ipath_user_sdma_pkt *pkt_prev; - int ret = 0; - - INIT_LIST_HEAD(&free_list); - - list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { - s64 descd = dd->ipath_sdma_descq_removed - pkt->added; - - if (descd < 0) - break; - - list_move_tail(&pkt->list, &free_list); - - /* one more packet cleaned */ - ret++; - } - - if (!list_empty(&free_list)) { - u32 counter; - - pkt = list_entry(free_list.prev, - struct ipath_user_sdma_pkt, list); - counter = pkt->counter; - - ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); - ipath_user_sdma_set_complete_counter(pq, counter); - } - - return ret; -} - -void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq) -{ - if (!pq) - return; - - kmem_cache_destroy(pq->pkt_slab); - dma_pool_destroy(pq->header_cache); - kfree(pq); -} - -/* clean descriptor queue, returns > 0 if some elements cleaned */ -static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd) -{ - int ret; - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - ret = ipath_sdma_make_progress(dd); - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - return ret; -} - -/* we're in close, drain packets so that we can cleanup successfully... */ -void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq) -{ - int i; - - if (!pq) - return; - - for (i = 0; i < 100; i++) { - mutex_lock(&pq->lock); - if (list_empty(&pq->sent)) { - mutex_unlock(&pq->lock); - break; - } - ipath_user_sdma_hwqueue_clean(dd); - ipath_user_sdma_queue_clean(dd, pq); - mutex_unlock(&pq->lock); - msleep(10); - } - - if (!list_empty(&pq->sent)) { - struct list_head free_list; - - printk(KERN_INFO "drain: lists not empty: forcing!\n"); - INIT_LIST_HEAD(&free_list); - mutex_lock(&pq->lock); - list_splice_init(&pq->sent, &free_list); - ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); - mutex_unlock(&pq->lock); - } -} - -static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd, - u64 addr, u64 dwlen, u64 dwoffset) -{ - return cpu_to_le64(/* SDmaPhyAddr[31:0] */ - ((addr & 0xfffffffcULL) << 32) | - /* SDmaGeneration[1:0] */ - ((dd->ipath_sdma_generation & 3ULL) << 30) | - /* SDmaDwordCount[10:0] */ - ((dwlen & 0x7ffULL) << 16) | - /* SDmaBufOffset[12:2] */ - (dwoffset & 0x7ffULL)); -} - -static inline __le64 ipath_sdma_make_first_desc0(__le64 descq) -{ - return descq | cpu_to_le64(1ULL << 12); -} - -static inline __le64 ipath_sdma_make_last_desc0(__le64 descq) -{ - /* last */ /* dma head */ - return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13); -} - -static inline __le64 ipath_sdma_make_desc1(u64 addr) -{ - /* SDmaPhyAddr[47:32] */ - return cpu_to_le64(addr >> 32); -} - -static void ipath_user_sdma_send_frag(struct ipath_devdata *dd, - struct ipath_user_sdma_pkt *pkt, int idx, - unsigned ofs, u16 tail) -{ - const u64 addr = (u64) pkt->addr[idx].addr + - (u64) pkt->addr[idx].offset; - const u64 dwlen = (u64) pkt->addr[idx].length / 4; - __le64 *descqp; - __le64 descq0; - - descqp = &dd->ipath_sdma_descq[tail].qw[0]; - - descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs); - if (idx == 0) - descq0 = ipath_sdma_make_first_desc0(descq0); - if (idx == pkt->naddr - 1) - descq0 = ipath_sdma_make_last_desc0(descq0); - - descqp[0] = descq0; - descqp[1] = ipath_sdma_make_desc1(addr); -} - -/* pq->lock must be held, get packets on the wire... */ -static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - struct list_head *pktlist) -{ - int ret = 0; - unsigned long flags; - u16 tail; - - if (list_empty(pktlist)) - return 0; - - if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE))) - return -ECOMM; - - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - - if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) { - ret = -ECOMM; - goto unlock; - } - - tail = dd->ipath_sdma_descq_tail; - while (!list_empty(pktlist)) { - struct ipath_user_sdma_pkt *pkt = - list_entry(pktlist->next, struct ipath_user_sdma_pkt, - list); - int i; - unsigned ofs = 0; - u16 dtail = tail; - - if (pkt->naddr > ipath_sdma_descq_freecnt(dd)) - goto unlock_check_tail; - - for (i = 0; i < pkt->naddr; i++) { - ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail); - ofs += pkt->addr[i].length >> 2; - - if (++tail == dd->ipath_sdma_descq_cnt) { - tail = 0; - ++dd->ipath_sdma_generation; - } - } - - if ((ofs<<2) > dd->ipath_ibmaxlen) { - ipath_dbg("packet size %X > ibmax %X, fail\n", - ofs<<2, dd->ipath_ibmaxlen); - ret = -EMSGSIZE; - goto unlock; - } - - /* - * if the packet is >= 2KB mtu equivalent, we have to use - * the large buffers, and have to mark each descriptor as - * part of a large buffer packet. - */ - if (ofs >= IPATH_SMALLBUF_DWORDS) { - for (i = 0; i < pkt->naddr; i++) { - dd->ipath_sdma_descq[dtail].qw[0] |= - cpu_to_le64(1ULL << 14); - if (++dtail == dd->ipath_sdma_descq_cnt) - dtail = 0; - } - } - - dd->ipath_sdma_descq_added += pkt->naddr; - pkt->added = dd->ipath_sdma_descq_added; - list_move_tail(&pkt->list, &pq->sent); - ret++; - } - -unlock_check_tail: - /* advance the tail on the chip if necessary */ - if (dd->ipath_sdma_descq_tail != tail) { - wmb(); - ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail); - dd->ipath_sdma_descq_tail = tail; - } - -unlock: - spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - return ret; -} - -int ipath_user_sdma_writev(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - const struct iovec *iov, - unsigned long dim) -{ - int ret = 0; - struct list_head list; - int npkts = 0; - - INIT_LIST_HEAD(&list); - - mutex_lock(&pq->lock); - - if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) { - ipath_user_sdma_hwqueue_clean(dd); - ipath_user_sdma_queue_clean(dd, pq); - } - - while (dim) { - const int mxp = 8; - - ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); - if (ret <= 0) - goto done_unlock; - else { - dim -= ret; - iov += ret; - } - - /* force packets onto the sdma hw queue... */ - if (!list_empty(&list)) { - /* - * lazily clean hw queue. the 4 is a guess of about - * how many sdma descriptors a packet will take (it - * doesn't have to be perfect). - */ - if (ipath_sdma_descq_freecnt(dd) < ret * 4) { - ipath_user_sdma_hwqueue_clean(dd); - ipath_user_sdma_queue_clean(dd, pq); - } - - ret = ipath_user_sdma_push_pkts(dd, pq, &list); - if (ret < 0) - goto done_unlock; - else { - npkts += ret; - pq->counter += ret; - - if (!list_empty(&list)) - goto done_unlock; - } - } - } - -done_unlock: - if (!list_empty(&list)) - ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list); - mutex_unlock(&pq->lock); - - return (ret < 0) ? ret : npkts; -} - -int ipath_user_sdma_make_progress(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq) -{ - int ret = 0; - - mutex_lock(&pq->lock); - ipath_user_sdma_hwqueue_clean(dd); - ret = ipath_user_sdma_queue_clean(dd, pq); - mutex_unlock(&pq->lock); - - return ret; -} - -u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq) -{ - return pq->sent_counter; -} - -u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq) -{ - return pq->counter; -} - diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/kernel/drivers/infiniband/hw/ipath/ipath_user_sdma.h deleted file mode 100644 index fc76316c4..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_user_sdma.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/device.h> - -struct ipath_user_sdma_queue; - -struct ipath_user_sdma_queue * -ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport); -void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq); - -int ipath_user_sdma_writev(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq, - const struct iovec *iov, - unsigned long dim); - -int ipath_user_sdma_make_progress(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq); - -void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, - struct ipath_user_sdma_queue *pq); - -u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq); -u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq); diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_verbs.c b/kernel/drivers/infiniband/hw/ipath/ipath_verbs.c deleted file mode 100644 index 44ea93904..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_verbs.c +++ /dev/null @@ -1,2342 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <rdma/ib_mad.h> -#include <rdma/ib_user_verbs.h> -#include <linux/io.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/utsname.h> -#include <linux/rculist.h> - -#include "ipath_kernel.h" -#include "ipath_verbs.h" -#include "ipath_common.h" - -static unsigned int ib_ipath_qp_table_size = 251; -module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); -MODULE_PARM_DESC(qp_table_size, "QP table size"); - -unsigned int ib_ipath_lkey_table_size = 12; -module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, - S_IRUGO); -MODULE_PARM_DESC(lkey_table_size, - "LKEY table size in bits (2^n, 1 <= n <= 23)"); - -static unsigned int ib_ipath_max_pds = 0xFFFF; -module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_pds, - "Maximum number of protection domains to support"); - -static unsigned int ib_ipath_max_ahs = 0xFFFF; -module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); - -unsigned int ib_ipath_max_cqes = 0x2FFFF; -module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_cqes, - "Maximum number of completion queue entries to support"); - -unsigned int ib_ipath_max_cqs = 0x1FFFF; -module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); - -unsigned int ib_ipath_max_qp_wrs = 0x3FFF; -module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, - S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); - -unsigned int ib_ipath_max_qps = 16384; -module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); - -unsigned int ib_ipath_max_sges = 0x60; -module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); - -unsigned int ib_ipath_max_mcast_grps = 16384; -module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint, - S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_mcast_grps, - "Maximum number of multicast groups to support"); - -unsigned int ib_ipath_max_mcast_qp_attached = 16; -module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached, - uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_mcast_qp_attached, - "Maximum number of attached QPs to support"); - -unsigned int ib_ipath_max_srqs = 1024; -module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); - -unsigned int ib_ipath_max_srq_sges = 128; -module_param_named(max_srq_sges, ib_ipath_max_srq_sges, - uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); - -unsigned int ib_ipath_max_srq_wrs = 0x1FFFF; -module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, - uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); - -static unsigned int ib_ipath_disable_sma; -module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(disable_sma, "Disable the SMA"); - -/* - * Note that it is OK to post send work requests in the SQE and ERR - * states; ipath_do_send() will process them and generate error - * completions as per IB 1.2 C10-96. - */ -const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { - [IB_QPS_RESET] = 0, - [IB_QPS_INIT] = IPATH_POST_RECV_OK, - [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, - [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | - IPATH_PROCESS_NEXT_SEND_OK, - [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, - [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, - [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | - IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, -}; - -struct ipath_ucontext { - struct ib_ucontext ibucontext; -}; - -static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct ipath_ucontext, ibucontext); -} - -/* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_ipath_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD -}; - -/* - * System image GUID. - */ -static __be64 sys_image_guid; - -/** - * ipath_copy_sge - copy data to SGE memory - * @ss: the SGE state - * @data: the data to copy - * @length: the length of the data - */ -void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - while (length) { - u32 len = sge->length; - - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - memcpy(sge->vaddr, data, len); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - data += len; - length -= len; - } -} - -/** - * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func - * @ss: the SGE state - * @length: the number of bytes to skip - */ -void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - while (length) { - u32 len = sge->length; - - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - length -= len; - } -} - -/* - * Count the number of DMA descriptors needed to send length bytes of data. - * Don't modify the ipath_sge_state to get the count. - * Return zero if any of the segments is not aligned. - */ -static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sg_list = ss->sg_list; - struct ipath_sge sge = ss->sge; - u8 num_sge = ss->num_sge; - u32 ndesc = 1; /* count the header */ - - while (length) { - u32 len = sge.length; - - if (len > length) - len = length; - if (len > sge.sge_length) - len = sge.sge_length; - BUG_ON(len == 0); - if (((long) sge.vaddr & (sizeof(u32) - 1)) || - (len != length && (len & (sizeof(u32) - 1)))) { - ndesc = 0; - break; - } - ndesc++; - sge.vaddr += len; - sge.length -= len; - sge.sge_length -= len; - if (sge.sge_length == 0) { - if (--num_sge) - sge = *sg_list++; - } else if (sge.length == 0 && sge.mr != NULL) { - if (++sge.n >= IPATH_SEGSZ) { - if (++sge.m >= sge.mr->mapsz) - break; - sge.n = 0; - } - sge.vaddr = - sge.mr->map[sge.m]->segs[sge.n].vaddr; - sge.length = - sge.mr->map[sge.m]->segs[sge.n].length; - } - length -= len; - } - return ndesc; -} - -/* - * Copy from the SGEs to the data buffer. - */ -static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss, - u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - while (length) { - u32 len = sge->length; - - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; - BUG_ON(len == 0); - memcpy(data, sge->vaddr, len); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - data += len; - length -= len; - } -} - -/** - * ipath_post_one_send - post one RC, UC, or UD send work request - * @qp: the QP to post on - * @wr: the work request to send - */ -static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) -{ - struct ipath_swqe *wqe; - u32 next; - int i; - int j; - int acc; - int ret; - unsigned long flags; - struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; - - spin_lock_irqsave(&qp->s_lock, flags); - - if (qp->ibqp.qp_type != IB_QPT_SMI && - !(dd->ipath_flags & IPATH_LINKACTIVE)) { - ret = -ENETDOWN; - goto bail; - } - - /* Check that state is OK to post send. */ - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) - goto bail_inval; - - /* IB spec says that num_sge == 0 is OK. */ - if (wr->num_sge > qp->s_max_sge) - goto bail_inval; - - /* - * Don't allow RDMA reads or atomic operations on UC or - * undefined operations. - * Make sure buffer is large enough to hold the result for atomics. - */ - if (qp->ibqp.qp_type == IB_QPT_UC) { - if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) - goto bail_inval; - } else if (qp->ibqp.qp_type == IB_QPT_UD) { - /* Check UD opcode */ - if (wr->opcode != IB_WR_SEND && - wr->opcode != IB_WR_SEND_WITH_IMM) - goto bail_inval; - /* Check UD destination address PD */ - if (qp->ibqp.pd != wr->wr.ud.ah->pd) - goto bail_inval; - } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) - goto bail_inval; - else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && - (wr->num_sge == 0 || - wr->sg_list[0].length < sizeof(u64) || - wr->sg_list[0].addr & (sizeof(u64) - 1))) - goto bail_inval; - else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) - goto bail_inval; - - next = qp->s_head + 1; - if (next >= qp->s_size) - next = 0; - if (next == qp->s_last) { - ret = -ENOMEM; - goto bail; - } - - wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; - wqe->length = 0; - if (wr->num_sge) { - acc = wr->opcode >= IB_WR_RDMA_READ ? - IB_ACCESS_LOCAL_WRITE : 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - u32 length = wr->sg_list[i].length; - int ok; - - if (length == 0) - continue; - ok = ipath_lkey_ok(qp, &wqe->sg_list[j], - &wr->sg_list[i], acc); - if (!ok) - goto bail_inval; - wqe->length += length; - j++; - } - wqe->wr.num_sge = j; - } - if (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) { - if (wqe->length > 0x80000000U) - goto bail_inval; - } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) - goto bail_inval; - wqe->ssn = qp->s_ssn++; - qp->s_head = next; - - ret = 0; - goto bail; - -bail_inval: - ret = -EINVAL; -bail: - spin_unlock_irqrestore(&qp->s_lock, flags); - return ret; -} - -/** - * ipath_post_send - post a send on a QP - * @ibqp: the QP to post the send on - * @wr: the list of work requests to post - * @bad_wr: the first bad WR is put here - * - * This may be called from interrupt context. - */ -static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) -{ - struct ipath_qp *qp = to_iqp(ibqp); - int err = 0; - - for (; wr; wr = wr->next) { - err = ipath_post_one_send(qp, wr); - if (err) { - *bad_wr = wr; - goto bail; - } - } - - /* Try to do the send work in the caller's context. */ - ipath_do_send((unsigned long) qp); - -bail: - return err; -} - -/** - * ipath_post_receive - post a receive on a QP - * @ibqp: the QP to post the receive on - * @wr: the WR to post - * @bad_wr: the first bad WR is put here - * - * This may be called from interrupt context. - */ -static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - struct ipath_qp *qp = to_iqp(ibqp); - struct ipath_rwq *wq = qp->r_rq.wq; - unsigned long flags; - int ret; - - /* Check that state is OK to post receive. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - for (; wr; wr = wr->next) { - struct ipath_rwqe *wqe; - u32 next; - int i; - - if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - - spin_lock_irqsave(&qp->r_rq.lock, flags); - next = wq->head + 1; - if (next >= qp->r_rq.size) - next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - *bad_wr = wr; - ret = -ENOMEM; - goto bail; - } - - wqe = get_rwqe_ptr(&qp->r_rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&qp->r_rq.lock, flags); - } - ret = 0; - -bail: - return ret; -} - -/** - * ipath_qp_rcv - processing an incoming packet on a QP - * @dev: the device the packet came on - * @hdr: the packet header - * @has_grh: true if the packet has a GRH - * @data: the packet data - * @tlen: the packet length - * @qp: the QP the packet came on - * - * This is called from ipath_ib_rcv() to process an incoming packet - * for the given QP. - * Called at interrupt level. - */ -static void ipath_qp_rcv(struct ipath_ibdev *dev, - struct ipath_ib_header *hdr, int has_grh, - void *data, u32 tlen, struct ipath_qp *qp) -{ - /* Check for valid receive state. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { - dev->n_pkt_drops++; - return; - } - - switch (qp->ibqp.qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - if (ib_ipath_disable_sma) - break; - /* FALLTHROUGH */ - case IB_QPT_UD: - ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); - break; - - case IB_QPT_RC: - ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp); - break; - - case IB_QPT_UC: - ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp); - break; - - default: - break; - } -} - -/** - * ipath_ib_rcv - process an incoming packet - * @arg: the device pointer - * @rhdr: the header of the packet - * @data: the packet data - * @tlen: the packet length - * - * This is called from ipath_kreceive() to process an incoming packet at - * interrupt level. Tlen is the length of the header + data + CRC in bytes. - */ -void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, - u32 tlen) -{ - struct ipath_ib_header *hdr = rhdr; - struct ipath_other_headers *ohdr; - struct ipath_qp *qp; - u32 qp_num; - int lnh; - u8 opcode; - u16 lid; - - if (unlikely(dev == NULL)) - goto bail; - - if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */ - dev->rcv_errors++; - goto bail; - } - - /* Check for a valid destination LID (see ch. 7.11.1). */ - lid = be16_to_cpu(hdr->lrh[1]); - if (lid < IPATH_MULTICAST_LID_BASE) { - lid &= ~((1 << dev->dd->ipath_lmc) - 1); - if (unlikely(lid != dev->dd->ipath_lid)) { - dev->rcv_errors++; - goto bail; - } - } - - /* Check for GRH */ - lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == IPATH_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == IPATH_LRH_GRH) - ohdr = &hdr->u.l.oth; - else { - dev->rcv_errors++; - goto bail; - } - - opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; - dev->opstats[opcode].n_bytes += tlen; - dev->opstats[opcode].n_packets++; - - /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK; - if (qp_num == IPATH_MULTICAST_QPN) { - struct ipath_mcast *mcast; - struct ipath_mcast_qp *p; - - if (lnh != IPATH_LRH_GRH) { - dev->n_pkt_drops++; - goto bail; - } - mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); - if (mcast == NULL) { - dev->n_pkt_drops++; - goto bail; - } - dev->n_multicast_rcv++; - list_for_each_entry_rcu(p, &mcast->qp_list, list) - ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp); - /* - * Notify ipath_multicast_detach() if it is waiting for us - * to finish. - */ - if (atomic_dec_return(&mcast->refcount) <= 1) - wake_up(&mcast->wait); - } else { - qp = ipath_lookup_qpn(&dev->qp_table, qp_num); - if (qp) { - dev->n_unicast_rcv++; - ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, - tlen, qp); - /* - * Notify ipath_destroy_qp() if it is waiting - * for us to finish. - */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } else - dev->n_pkt_drops++; - } - -bail:; -} - -/** - * ipath_ib_timer - verbs timer - * @arg: the device pointer - * - * This is called from ipath_do_rcv_timer() at interrupt level to check for - * QPs which need retransmits and to collect performance numbers. - */ -static void ipath_ib_timer(struct ipath_ibdev *dev) -{ - struct ipath_qp *resend = NULL; - struct ipath_qp *rnr = NULL; - struct list_head *last; - struct ipath_qp *qp; - unsigned long flags; - - if (dev == NULL) - return; - - spin_lock_irqsave(&dev->pending_lock, flags); - /* Start filling the next pending queue. */ - if (++dev->pending_index >= ARRAY_SIZE(dev->pending)) - dev->pending_index = 0; - /* Save any requests still in the new queue, they have timed out. */ - last = &dev->pending[dev->pending_index]; - while (!list_empty(last)) { - qp = list_entry(last->next, struct ipath_qp, timerwait); - list_del_init(&qp->timerwait); - qp->timer_next = resend; - resend = qp; - atomic_inc(&qp->refcount); - } - last = &dev->rnrwait; - if (!list_empty(last)) { - qp = list_entry(last->next, struct ipath_qp, timerwait); - if (--qp->s_rnr_timeout == 0) { - do { - list_del_init(&qp->timerwait); - qp->timer_next = rnr; - rnr = qp; - atomic_inc(&qp->refcount); - if (list_empty(last)) - break; - qp = list_entry(last->next, struct ipath_qp, - timerwait); - } while (qp->s_rnr_timeout == 0); - } - } - /* - * We should only be in the started state if pma_sample_start != 0 - */ - if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && - --dev->pma_sample_start == 0) { - dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; - ipath_snapshot_counters(dev->dd, &dev->ipath_sword, - &dev->ipath_rword, - &dev->ipath_spkts, - &dev->ipath_rpkts, - &dev->ipath_xmit_wait); - } - if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { - if (dev->pma_sample_interval == 0) { - u64 ta, tb, tc, td, te; - - dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; - ipath_snapshot_counters(dev->dd, &ta, &tb, - &tc, &td, &te); - - dev->ipath_sword = ta - dev->ipath_sword; - dev->ipath_rword = tb - dev->ipath_rword; - dev->ipath_spkts = tc - dev->ipath_spkts; - dev->ipath_rpkts = td - dev->ipath_rpkts; - dev->ipath_xmit_wait = te - dev->ipath_xmit_wait; - } - else - dev->pma_sample_interval--; - } - spin_unlock_irqrestore(&dev->pending_lock, flags); - - /* XXX What if timer fires again while this is running? */ - while (resend != NULL) { - qp = resend; - resend = qp->timer_next; - - spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_last != qp->s_tail && - ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { - dev->n_timeouts++; - ipath_restart_rc(qp, qp->s_last_psn + 1); - } - spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Notify ipath_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } - while (rnr != NULL) { - qp = rnr; - rnr = qp->timer_next; - - spin_lock_irqsave(&qp->s_lock, flags); - if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) - ipath_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Notify ipath_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } -} - -static void update_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - sge->vaddr += length; - sge->length -= length; - sge->sge_length -= length; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - return; - sge->n = 0; - } - sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = sge->mr->map[sge->m]->segs[sge->n].length; - } -} - -#ifdef __LITTLE_ENDIAN -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); - data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#else -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); - data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#endif - -static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, - u32 length, unsigned flush_wc) -{ - u32 extra = 0; - u32 data = 0; - u32 last; - - while (1) { - u32 len = ss->sge.length; - u32 off; - - if (len > length) - len = length; - if (len > ss->sge.sge_length) - len = ss->sge.sge_length; - BUG_ON(len == 0); - /* If the source address is not aligned, try to align it. */ - off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); - if (off) { - u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & - ~(sizeof(u32) - 1)); - u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); - u32 y; - - y = sizeof(u32) - off; - if (len > y) - len = y; - if (len + extra >= sizeof(u32)) { - data |= set_upper_bits(v, extra * - BITS_PER_BYTE); - len = sizeof(u32) - extra; - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, len, extra); - if (len == length) { - last = data; - break; - } - extra += len; - } - } else if (extra) { - /* Source address is aligned. */ - u32 *addr = (u32 *) ss->sge.vaddr; - int shift = extra * BITS_PER_BYTE; - int ushift = 32 - shift; - u32 l = len; - - while (l >= sizeof(u32)) { - u32 v = *addr; - - data |= set_upper_bits(v, shift); - __raw_writel(data, piobuf); - data = get_upper_bits(v, ushift); - piobuf++; - addr++; - l -= sizeof(u32); - } - /* - * We still have 'extra' number of bytes leftover. - */ - if (l) { - u32 v = *addr; - - if (l + extra >= sizeof(u32)) { - data |= set_upper_bits(v, shift); - len -= l + extra - sizeof(u32); - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, l, - extra); - if (len == length) { - last = data; - break; - } - extra += l; - } - } else if (len == length) { - last = data; - break; - } - } else if (len == length) { - u32 w; - - /* - * Need to round up for the last dword in the - * packet. - */ - w = (len + 3) >> 2; - __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); - piobuf += w - 1; - last = ((u32 *) ss->sge.vaddr)[w - 1]; - break; - } else { - u32 w = len >> 2; - - __iowrite32_copy(piobuf, ss->sge.vaddr, w); - piobuf += w; - - extra = len & (sizeof(u32) - 1); - if (extra) { - u32 v = ((u32 *) ss->sge.vaddr)[w]; - - /* Clear unused upper bytes */ - data = clear_upper_bytes(v, extra, 0); - } - } - update_sge(ss, len); - length -= len; - } - /* Update address before sending packet. */ - update_sge(ss, length); - if (flush_wc) { - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(last, piobuf); - /* be sure trigger word is written */ - ipath_flush_wc(); - } else - __raw_writel(last, piobuf); -} - -/* - * Convert IB rate to delay multiplier. - */ -unsigned ipath_ib_rate_to_mult(enum ib_rate rate) -{ - switch (rate) { - case IB_RATE_2_5_GBPS: return 8; - case IB_RATE_5_GBPS: return 4; - case IB_RATE_10_GBPS: return 2; - case IB_RATE_20_GBPS: return 1; - default: return 0; - } -} - -/* - * Convert delay multiplier to IB rate - */ -static enum ib_rate ipath_mult_to_ib_rate(unsigned mult) -{ - switch (mult) { - case 8: return IB_RATE_2_5_GBPS; - case 4: return IB_RATE_5_GBPS; - case 2: return IB_RATE_10_GBPS; - case 1: return IB_RATE_20_GBPS; - default: return IB_RATE_PORT_CURRENT; - } -} - -static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev) -{ - struct ipath_verbs_txreq *tx = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev->pending_lock, flags); - if (!list_empty(&dev->txreq_free)) { - struct list_head *l = dev->txreq_free.next; - - list_del(l); - tx = list_entry(l, struct ipath_verbs_txreq, txreq.list); - } - spin_unlock_irqrestore(&dev->pending_lock, flags); - return tx; -} - -static inline void put_txreq(struct ipath_ibdev *dev, - struct ipath_verbs_txreq *tx) -{ - unsigned long flags; - - spin_lock_irqsave(&dev->pending_lock, flags); - list_add(&tx->txreq.list, &dev->txreq_free); - spin_unlock_irqrestore(&dev->pending_lock, flags); -} - -static void sdma_complete(void *cookie, int status) -{ - struct ipath_verbs_txreq *tx = cookie; - struct ipath_qp *qp = tx->qp; - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned long flags; - enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - - if (atomic_dec_and_test(&qp->s_dma_busy)) { - spin_lock_irqsave(&qp->s_lock, flags); - if (tx->wqe) - ipath_send_complete(qp, tx->wqe, ibs); - if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && - qp->s_last != qp->s_head) || - (qp->s_flags & IPATH_S_WAIT_DMA)) - ipath_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - wake_up(&qp->wait_dma); - } else if (tx->wqe) { - spin_lock_irqsave(&qp->s_lock, flags); - ipath_send_complete(qp, tx->wqe, ibs); - spin_unlock_irqrestore(&qp->s_lock, flags); - } - - if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) - kfree(tx->txreq.map_addr); - put_txreq(dev, tx); - - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); -} - -static void decrement_dma_busy(struct ipath_qp *qp) -{ - unsigned long flags; - - if (atomic_dec_and_test(&qp->s_dma_busy)) { - spin_lock_irqsave(&qp->s_lock, flags); - if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && - qp->s_last != qp->s_head) || - (qp->s_flags & IPATH_S_WAIT_DMA)) - ipath_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - wake_up(&qp->wait_dma); - } -} - -/* - * Compute the number of clock cycles of delay before sending the next packet. - * The multipliers reflect the number of clocks for the fastest rate so - * one tick at 4xDDR is 8 ticks at 1xSDR. - * If the destination port will take longer to receive a packet than - * the outgoing link can send it, we need to delay sending the next packet - * by the difference in time it takes the receiver to receive and the sender - * to send this packet. - * Note that this delay is always correct for UC and RC but not always - * optimal for UD. For UD, the destination HCA can be different for each - * packet, in which case, we could send packets to a different destination - * while "waiting" for the delay. The overhead for doing this without - * HW support is more than just paying the cost of delaying some packets - * unnecessarily. - */ -static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult) -{ - return (rcv_mult > snd_mult) ? - (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; -} - -static int ipath_verbs_send_dma(struct ipath_qp *qp, - struct ipath_ib_header *hdr, u32 hdrwords, - struct ipath_sge_state *ss, u32 len, - u32 plen, u32 dwords) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_devdata *dd = dev->dd; - struct ipath_verbs_txreq *tx; - u32 *piobuf; - u32 control; - u32 ndesc; - int ret; - - tx = qp->s_tx; - if (tx) { - qp->s_tx = NULL; - /* resend previously constructed packet */ - atomic_inc(&qp->s_dma_busy); - ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); - if (ret) { - qp->s_tx = tx; - decrement_dma_busy(qp); - } - goto bail; - } - - tx = get_txreq(dev); - if (!tx) { - ret = -EBUSY; - goto bail; - } - - /* - * Get the saved delay count we computed for the previous packet - * and save the delay count for this packet to be used next time - * we get here. - */ - control = qp->s_pkt_delay; - qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); - - tx->qp = qp; - atomic_inc(&qp->refcount); - tx->wqe = qp->s_wqe; - tx->txreq.callback = sdma_complete; - tx->txreq.callback_cookie = tx; - tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST | - IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC; - if (plen + 1 >= IPATH_SMALLBUF_DWORDS) - tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF; - - /* VL15 packets bypass credit check */ - if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) { - control |= 1ULL << 31; - tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15; - } - - if (len) { - /* - * Don't try to DMA if it takes more descriptors than - * the queue holds. - */ - ndesc = ipath_count_sge(ss, len); - if (ndesc >= dd->ipath_sdma_descq_cnt) - ndesc = 0; - } else - ndesc = 1; - if (ndesc) { - tx->hdr.pbc[0] = cpu_to_le32(plen); - tx->hdr.pbc[1] = cpu_to_le32(control); - memcpy(&tx->hdr.hdr, hdr, hdrwords << 2); - tx->txreq.sg_count = ndesc; - tx->map_len = (hdrwords + 2) << 2; - tx->txreq.map_addr = &tx->hdr; - atomic_inc(&qp->s_dma_busy); - ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); - if (ret) { - /* save ss and length in dwords */ - tx->ss = ss; - tx->len = dwords; - qp->s_tx = tx; - decrement_dma_busy(qp); - } - goto bail; - } - - /* Allocate a buffer and copy the header and payload to it. */ - tx->map_len = (plen + 1) << 2; - piobuf = kmalloc(tx->map_len, GFP_ATOMIC); - if (unlikely(piobuf == NULL)) { - ret = -EBUSY; - goto err_tx; - } - tx->txreq.map_addr = piobuf; - tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF; - tx->txreq.sg_count = 1; - - *piobuf++ = (__force u32) cpu_to_le32(plen); - *piobuf++ = (__force u32) cpu_to_le32(control); - memcpy(piobuf, hdr, hdrwords << 2); - ipath_copy_from_sge(piobuf + hdrwords, ss, len); - - atomic_inc(&qp->s_dma_busy); - ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); - /* - * If we couldn't queue the DMA request, save the info - * and try again later rather than destroying the - * buffer and undoing the side effects of the copy. - */ - if (ret) { - tx->ss = NULL; - tx->len = 0; - qp->s_tx = tx; - decrement_dma_busy(qp); - } - dev->n_unaligned++; - goto bail; - -err_tx: - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - put_txreq(dev, tx); -bail: - return ret; -} - -static int ipath_verbs_send_pio(struct ipath_qp *qp, - struct ipath_ib_header *ibhdr, u32 hdrwords, - struct ipath_sge_state *ss, u32 len, - u32 plen, u32 dwords) -{ - struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; - u32 *hdr = (u32 *) ibhdr; - u32 __iomem *piobuf; - unsigned flush_wc; - u32 control; - int ret; - unsigned long flags; - - piobuf = ipath_getpiobuf(dd, plen, NULL); - if (unlikely(piobuf == NULL)) { - ret = -EBUSY; - goto bail; - } - - /* - * Get the saved delay count we computed for the previous packet - * and save the delay count for this packet to be used next time - * we get here. - */ - control = qp->s_pkt_delay; - qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); - - /* VL15 packets bypass credit check */ - if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15) - control |= 1ULL << 31; - - /* - * Write the length to the control qword plus any needed flags. - * We have to flush after the PBC for correctness on some cpus - * or WC buffer can be written out of order. - */ - writeq(((u64) control << 32) | plen, piobuf); - piobuf += 2; - - flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC; - if (len == 0) { - /* - * If there is just the header portion, must flush before - * writing last word of header for correctness, and after - * the last header word (trigger word). - */ - if (flush_wc) { - ipath_flush_wc(); - __iowrite32_copy(piobuf, hdr, hdrwords - 1); - ipath_flush_wc(); - __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); - ipath_flush_wc(); - } else - __iowrite32_copy(piobuf, hdr, hdrwords); - goto done; - } - - if (flush_wc) - ipath_flush_wc(); - __iowrite32_copy(piobuf, hdr, hdrwords); - piobuf += hdrwords; - - /* The common case is aligned and contained in one segment. */ - if (likely(ss->num_sge == 1 && len <= ss->sge.length && - !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { - u32 *addr = (u32 *) ss->sge.vaddr; - - /* Update address before sending packet. */ - update_sge(ss, len); - if (flush_wc) { - __iowrite32_copy(piobuf, addr, dwords - 1); - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(addr[dwords - 1], piobuf + dwords - 1); - /* be sure trigger word is written */ - ipath_flush_wc(); - } else - __iowrite32_copy(piobuf, addr, dwords); - goto done; - } - copy_io(piobuf, ss, len, flush_wc); -done: - if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); - ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); - spin_unlock_irqrestore(&qp->s_lock, flags); - } - ret = 0; -bail: - return ret; -} - -/** - * ipath_verbs_send - send a packet - * @qp: the QP to send on - * @hdr: the packet header - * @hdrwords: the number of 32-bit words in the header - * @ss: the SGE to send - * @len: the length of the packet in bytes - */ -int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, - u32 hdrwords, struct ipath_sge_state *ss, u32 len) -{ - struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; - u32 plen; - int ret; - u32 dwords = (len + 3) >> 2; - - /* - * Calculate the send buffer trigger address. - * The +1 counts for the pbc control dword following the pbc length. - */ - plen = hdrwords + dwords + 1; - - /* - * VL15 packets (IB_QPT_SMI) will always use PIO, so we - * can defer SDMA restart until link goes ACTIVE without - * worrying about just how we got there. - */ - if (qp->ibqp.qp_type == IB_QPT_SMI || - !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) - ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, - plen, dwords); - else - ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, - plen, dwords); - - return ret; -} - -int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait) -{ - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ret = -EINVAL; - goto bail; - } - *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); - - ret = 0; - -bail: - return ret; -} - -/** - * ipath_get_counters - get various chip counters - * @dd: the infinipath device - * @cntrs: counters are placed here - * - * Return the counters needed by recv_pma_get_portcounters(). - */ -int ipath_get_counters(struct ipath_devdata *dd, - struct ipath_verbs_counters *cntrs) -{ - struct ipath_cregs const *crp = dd->ipath_cregs; - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ret = -EINVAL; - goto bail; - } - cntrs->symbol_error_counter = - ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt); - cntrs->link_error_recovery_counter = - ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt); - /* - * The link downed counter counts when the other side downs the - * connection. We add in the number of times we downed the link - * due to local link integrity errors to compensate. - */ - cntrs->link_downed_counter = - ipath_snap_cntr(dd, crp->cr_iblinkdowncnt); - cntrs->port_rcv_errors = - ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) + - ipath_snap_cntr(dd, crp->cr_rcvovflcnt) + - ipath_snap_cntr(dd, crp->cr_portovflcnt) + - ipath_snap_cntr(dd, crp->cr_err_rlencnt) + - ipath_snap_cntr(dd, crp->cr_invalidrlencnt) + - ipath_snap_cntr(dd, crp->cr_errlinkcnt) + - ipath_snap_cntr(dd, crp->cr_erricrccnt) + - ipath_snap_cntr(dd, crp->cr_errvcrccnt) + - ipath_snap_cntr(dd, crp->cr_errlpcrccnt) + - ipath_snap_cntr(dd, crp->cr_badformatcnt) + - dd->ipath_rxfc_unsupvl_errs; - if (crp->cr_rxotherlocalphyerrcnt) - cntrs->port_rcv_errors += - ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt); - if (crp->cr_rxvlerrcnt) - cntrs->port_rcv_errors += - ipath_snap_cntr(dd, crp->cr_rxvlerrcnt); - cntrs->port_rcv_remphys_errors = - ipath_snap_cntr(dd, crp->cr_rcvebpcnt); - cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt); - cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt); - cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt); - cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt); - cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt); - cntrs->local_link_integrity_errors = - crp->cr_locallinkintegrityerrcnt ? - ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) : - ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? - dd->ipath_lli_errs : dd->ipath_lli_errors); - cntrs->excessive_buffer_overrun_errors = - crp->cr_excessbufferovflcnt ? - ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) : - dd->ipath_overrun_thresh_errs; - cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ? - ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0; - - ret = 0; - -bail: - return ret; -} - -/** - * ipath_ib_piobufavail - callback when a PIO buffer is available - * @arg: the device pointer - * - * This is called from ipath_intr() at interrupt level when a PIO buffer is - * available after ipath_verbs_send() returned an error that no buffers were - * available. Return 1 if we consumed all the PIO buffers and we still have - * QPs waiting for buffers (for now, just restart the send tasklet and - * return zero). - */ -int ipath_ib_piobufavail(struct ipath_ibdev *dev) -{ - struct list_head *list; - struct ipath_qp *qplist; - struct ipath_qp *qp; - unsigned long flags; - - if (dev == NULL) - goto bail; - - list = &dev->piowait; - qplist = NULL; - - spin_lock_irqsave(&dev->pending_lock, flags); - while (!list_empty(list)) { - qp = list_entry(list->next, struct ipath_qp, piowait); - list_del_init(&qp->piowait); - qp->pio_next = qplist; - qplist = qp; - atomic_inc(&qp->refcount); - } - spin_unlock_irqrestore(&dev->pending_lock, flags); - - while (qplist != NULL) { - qp = qplist; - qplist = qp->pio_next; - - spin_lock_irqsave(&qp->s_lock, flags); - if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) - ipath_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Notify ipath_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } - -bail: - return 0; -} - -static int ipath_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - - memset(props, 0, sizeof(*props)); - - props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | - IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | - IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; - props->page_size_cap = PAGE_SIZE; - props->vendor_id = - IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; - props->vendor_part_id = dev->dd->ipath_deviceid; - props->hw_ver = dev->dd->ipath_pcirev; - - props->sys_image_guid = dev->sys_image_guid; - - props->max_mr_size = ~0ull; - props->max_qp = ib_ipath_max_qps; - props->max_qp_wr = ib_ipath_max_qp_wrs; - props->max_sge = ib_ipath_max_sges; - props->max_cq = ib_ipath_max_cqs; - props->max_ah = ib_ipath_max_ahs; - props->max_cqe = ib_ipath_max_cqes; - props->max_mr = dev->lk_table.max; - props->max_fmr = dev->lk_table.max; - props->max_map_per_fmr = 32767; - props->max_pd = ib_ipath_max_pds; - props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; - props->max_qp_init_rd_atom = 255; - /* props->max_res_rd_atom */ - props->max_srq = ib_ipath_max_srqs; - props->max_srq_wr = ib_ipath_max_srq_wrs; - props->max_srq_sge = ib_ipath_max_srq_sges; - /* props->local_ca_ack_delay */ - props->atomic_cap = IB_ATOMIC_GLOB; - props->max_pkeys = ipath_get_npkeys(dev->dd); - props->max_mcast_grp = ib_ipath_max_mcast_grps; - props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; - props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * - props->max_mcast_grp; - - return 0; -} - -const u8 ipath_cvt_physportstate[32] = { - [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, - [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, - [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, - [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, - [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, - [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, - [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = - IB_PHYSPORTSTATE_CFG_TRAIN, - [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = - IB_PHYSPORTSTATE_CFG_TRAIN, - [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = - IB_PHYSPORTSTATE_CFG_TRAIN, - [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, - [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = - IB_PHYSPORTSTATE_LINK_ERR_RECOVER, - [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = - IB_PHYSPORTSTATE_LINK_ERR_RECOVER, - [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = - IB_PHYSPORTSTATE_LINK_ERR_RECOVER, - [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, - [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN -}; - -u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) -{ - return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); -} - -static int ipath_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_devdata *dd = dev->dd; - enum ib_mtu mtu; - u16 lid = dd->ipath_lid; - u64 ibcstat; - - memset(props, 0, sizeof(*props)); - props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); - props->lmc = dd->ipath_lmc; - props->sm_lid = dev->sm_lid; - props->sm_sl = dev->sm_sl; - ibcstat = dd->ipath_lastibcstat; - /* map LinkState to IB portinfo values. */ - props->state = ipath_ib_linkstate(dd, ibcstat) + 1; - - /* See phys_state_show() */ - props->phys_state = /* MEA: assumes shift == 0 */ - ipath_cvt_physportstate[dd->ipath_lastibcstat & - dd->ibcs_lts_mask]; - props->port_cap_flags = dev->port_cap_flags; - props->gid_tbl_len = 1; - props->max_msg_sz = 0x80000000; - props->pkey_tbl_len = ipath_get_npkeys(dd); - props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) - - dev->z_pkey_violations; - props->qkey_viol_cntr = dev->qkey_violations; - props->active_width = dd->ipath_link_width_active; - /* See rate_show() */ - props->active_speed = dd->ipath_link_speed_active; - props->max_vl_num = 1; /* VLCap = VL0 */ - props->init_type_reply = 0; - - props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048; - switch (dd->ipath_ibmtu) { - case 4096: - mtu = IB_MTU_4096; - break; - case 2048: - mtu = IB_MTU_2048; - break; - case 1024: - mtu = IB_MTU_1024; - break; - case 512: - mtu = IB_MTU_512; - break; - case 256: - mtu = IB_MTU_256; - break; - default: - mtu = IB_MTU_2048; - } - props->active_mtu = mtu; - props->subnet_timeout = dev->subnet_timeout; - - return 0; -} - -static int ipath_modify_device(struct ib_device *device, - int device_modify_mask, - struct ib_device_modify *device_modify) -{ - int ret; - - if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | - IB_DEVICE_MODIFY_NODE_DESC)) { - ret = -EOPNOTSUPP; - goto bail; - } - - if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) - memcpy(device->node_desc, device_modify->node_desc, 64); - - if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) - to_idev(device)->sys_image_guid = - cpu_to_be64(device_modify->sys_image_guid); - - ret = 0; - -bail: - return ret; -} - -static int ipath_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - - dev->port_cap_flags |= props->set_port_cap_mask; - dev->port_cap_flags &= ~props->clr_port_cap_mask; - if (port_modify_mask & IB_PORT_SHUTDOWN) - ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); - if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) - dev->qkey_violations = 0; - return 0; -} - -static int ipath_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - int ret; - - if (index >= 1) { - ret = -EINVAL; - goto bail; - } - gid->global.subnet_prefix = dev->gid_prefix; - gid->global.interface_id = dev->dd->ipath_guid; - - ret = 0; - -bail: - return ret; -} - -static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_pd *pd; - struct ib_pd *ret; - - /* - * This is actually totally arbitrary. Some correctness tests - * assume there's a maximum number of PDs that can be allocated. - * We don't actually have this limit, but we fail the test if - * we allow allocations of more than we report for this value. - */ - - pd = kmalloc(sizeof *pd, GFP_KERNEL); - if (!pd) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - spin_lock(&dev->n_pds_lock); - if (dev->n_pds_allocated == ib_ipath_max_pds) { - spin_unlock(&dev->n_pds_lock); - kfree(pd); - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - dev->n_pds_allocated++; - spin_unlock(&dev->n_pds_lock); - - /* ib_alloc_pd() will initialize pd->ibpd. */ - pd->user = udata != NULL; - - ret = &pd->ibpd; - -bail: - return ret; -} - -static int ipath_dealloc_pd(struct ib_pd *ibpd) -{ - struct ipath_pd *pd = to_ipd(ibpd); - struct ipath_ibdev *dev = to_idev(ibpd->device); - - spin_lock(&dev->n_pds_lock); - dev->n_pds_allocated--; - spin_unlock(&dev->n_pds_lock); - - kfree(pd); - - return 0; -} - -/** - * ipath_create_ah - create an address handle - * @pd: the protection domain - * @ah_attr: the attributes of the AH - * - * This may be called from interrupt context. - */ -static struct ib_ah *ipath_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) -{ - struct ipath_ah *ah; - struct ib_ah *ret; - struct ipath_ibdev *dev = to_idev(pd->device); - unsigned long flags; - - /* A multicast address requires a GRH (see ch. 8.4.1). */ - if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && - ah_attr->dlid != IPATH_PERMISSIVE_LID && - !(ah_attr->ah_flags & IB_AH_GRH)) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - if (ah_attr->dlid == 0) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - if (ah_attr->port_num < 1 || - ah_attr->port_num > pd->device->phys_port_cnt) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - spin_lock_irqsave(&dev->n_ahs_lock, flags); - if (dev->n_ahs_allocated == ib_ipath_max_ahs) { - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - kfree(ah); - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - dev->n_ahs_allocated++; - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - - /* ib_create_ah() will initialize ah->ibah. */ - ah->attr = *ah_attr; - ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate); - - ret = &ah->ibah; - -bail: - return ret; -} - -/** - * ipath_destroy_ah - destroy an address handle - * @ibah: the AH to destroy - * - * This may be called from interrupt context. - */ -static int ipath_destroy_ah(struct ib_ah *ibah) -{ - struct ipath_ibdev *dev = to_idev(ibah->device); - struct ipath_ah *ah = to_iah(ibah); - unsigned long flags; - - spin_lock_irqsave(&dev->n_ahs_lock, flags); - dev->n_ahs_allocated--; - spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - - kfree(ah); - - return 0; -} - -static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) -{ - struct ipath_ah *ah = to_iah(ibah); - - *ah_attr = ah->attr; - ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate); - - return 0; -} - -/** - * ipath_get_npkeys - return the size of the PKEY table for port 0 - * @dd: the infinipath device - */ -unsigned ipath_get_npkeys(struct ipath_devdata *dd) -{ - return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); -} - -/** - * ipath_get_pkey - return the indexed PKEY from the port PKEY table - * @dd: the infinipath device - * @index: the PKEY index - */ -unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) -{ - unsigned ret; - - /* always a kernel port, no locking needed */ - if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) - ret = 0; - else - ret = dd->ipath_pd[0]->port_pkeys[index]; - - return ret; -} - -static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - struct ipath_ibdev *dev = to_idev(ibdev); - int ret; - - if (index >= ipath_get_npkeys(dev->dd)) { - ret = -EINVAL; - goto bail; - } - - *pkey = ipath_get_pkey(dev->dd, index); - ret = 0; - -bail: - return ret; -} - -/** - * ipath_alloc_ucontext - allocate a ucontest - * @ibdev: the infiniband device - * @udata: not used by the InfiniPath driver - */ - -static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) -{ - struct ipath_ucontext *context; - struct ib_ucontext *ret; - - context = kmalloc(sizeof *context, GFP_KERNEL); - if (!context) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - - ret = &context->ibucontext; - -bail: - return ret; -} - -static int ipath_dealloc_ucontext(struct ib_ucontext *context) -{ - kfree(to_iucontext(context)); - return 0; -} - -static int ipath_verbs_register_sysfs(struct ib_device *dev); - -static void __verbs_timer(unsigned long arg) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) arg; - - /* Handle verbs layer timeouts. */ - ipath_ib_timer(dd->verbs_dev); - - mod_timer(&dd->verbs_timer, jiffies + 1); -} - -static int enable_timer(struct ipath_devdata *dd) -{ - /* - * Early chips had a design flaw where the chip and kernel idea - * of the tail register don't always agree, and therefore we won't - * get an interrupt on the next packet received. - * If the board supports per packet receive interrupts, use it. - * Otherwise, the timer function periodically checks for packets - * to cover this case. - * Either way, the timer is needed for verbs layer related - * processing. - */ - if (dd->ipath_flags & IPATH_GPIO_INTR) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, - 0x2074076542310ULL); - /* Enable GPIO bit 2 interrupt */ - dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT); - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - } - - init_timer(&dd->verbs_timer); - dd->verbs_timer.function = __verbs_timer; - dd->verbs_timer.data = (unsigned long)dd; - dd->verbs_timer.expires = jiffies + 1; - add_timer(&dd->verbs_timer); - - return 0; -} - -static int disable_timer(struct ipath_devdata *dd) -{ - /* Disable GPIO bit 2 interrupt */ - if (dd->ipath_flags & IPATH_GPIO_INTR) { - /* Disable GPIO bit 2 interrupt */ - dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - /* - * We might want to undo changes to debugportselect, - * but how? - */ - } - - del_timer_sync(&dd->verbs_timer); - - return 0; -} - -/** - * ipath_register_ib_device - register our device with the infiniband core - * @dd: the device data structure - * Return the allocated ipath_ibdev pointer or NULL on error. - */ -int ipath_register_ib_device(struct ipath_devdata *dd) -{ - struct ipath_verbs_counters cntrs; - struct ipath_ibdev *idev; - struct ib_device *dev; - struct ipath_verbs_txreq *tx; - unsigned i; - int ret; - - idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); - if (idev == NULL) { - ret = -ENOMEM; - goto bail; - } - - dev = &idev->ibdev; - - if (dd->ipath_sdma_descq_cnt) { - tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx, - GFP_KERNEL); - if (tx == NULL) { - ret = -ENOMEM; - goto err_tx; - } - } else - tx = NULL; - idev->txreq_bufs = tx; - - /* Only need to initialize non-zero fields. */ - spin_lock_init(&idev->n_pds_lock); - spin_lock_init(&idev->n_ahs_lock); - spin_lock_init(&idev->n_cqs_lock); - spin_lock_init(&idev->n_qps_lock); - spin_lock_init(&idev->n_srqs_lock); - spin_lock_init(&idev->n_mcast_grps_lock); - - spin_lock_init(&idev->qp_table.lock); - spin_lock_init(&idev->lk_table.lock); - idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); - /* Set the prefix to the default value (see ch. 4.1.1) */ - idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); - - ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); - if (ret) - goto err_qp; - - /* - * The top ib_ipath_lkey_table_size bits are used to index the - * table. The lower 8 bits can be owned by the user (copied from - * the LKEY). The remaining bits act as a generation number or tag. - */ - idev->lk_table.max = 1 << ib_ipath_lkey_table_size; - idev->lk_table.table = kzalloc(idev->lk_table.max * - sizeof(*idev->lk_table.table), - GFP_KERNEL); - if (idev->lk_table.table == NULL) { - ret = -ENOMEM; - goto err_lk; - } - INIT_LIST_HEAD(&idev->pending_mmaps); - spin_lock_init(&idev->pending_lock); - idev->mmap_offset = PAGE_SIZE; - spin_lock_init(&idev->mmap_offset_lock); - INIT_LIST_HEAD(&idev->pending[0]); - INIT_LIST_HEAD(&idev->pending[1]); - INIT_LIST_HEAD(&idev->pending[2]); - INIT_LIST_HEAD(&idev->piowait); - INIT_LIST_HEAD(&idev->rnrwait); - INIT_LIST_HEAD(&idev->txreq_free); - idev->pending_index = 0; - idev->port_cap_flags = - IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; - if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY) - idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; - idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; - idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; - idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; - idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; - idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; - - /* Snapshot current HW counters to "clear" them. */ - ipath_get_counters(dd, &cntrs); - idev->z_symbol_error_counter = cntrs.symbol_error_counter; - idev->z_link_error_recovery_counter = - cntrs.link_error_recovery_counter; - idev->z_link_downed_counter = cntrs.link_downed_counter; - idev->z_port_rcv_errors = cntrs.port_rcv_errors; - idev->z_port_rcv_remphys_errors = - cntrs.port_rcv_remphys_errors; - idev->z_port_xmit_discards = cntrs.port_xmit_discards; - idev->z_port_xmit_data = cntrs.port_xmit_data; - idev->z_port_rcv_data = cntrs.port_rcv_data; - idev->z_port_xmit_packets = cntrs.port_xmit_packets; - idev->z_port_rcv_packets = cntrs.port_rcv_packets; - idev->z_local_link_integrity_errors = - cntrs.local_link_integrity_errors; - idev->z_excessive_buffer_overrun_errors = - cntrs.excessive_buffer_overrun_errors; - idev->z_vl15_dropped = cntrs.vl15_dropped; - - for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++) - list_add(&tx->txreq.list, &idev->txreq_free); - - /* - * The system image GUID is supposed to be the same for all - * IB HCAs in a single system but since there can be other - * device types in the system, we can't be sure this is unique. - */ - if (!sys_image_guid) - sys_image_guid = dd->ipath_guid; - idev->sys_image_guid = sys_image_guid; - idev->ib_unit = dd->ipath_unit; - idev->dd = dd; - - strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); - dev->owner = THIS_MODULE; - dev->node_guid = dd->ipath_guid; - dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; - dev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->node_type = RDMA_NODE_IB_CA; - dev->phys_port_cnt = 1; - dev->num_comp_vectors = 1; - dev->dma_device = &dd->pcidev->dev; - dev->query_device = ipath_query_device; - dev->modify_device = ipath_modify_device; - dev->query_port = ipath_query_port; - dev->modify_port = ipath_modify_port; - dev->query_pkey = ipath_query_pkey; - dev->query_gid = ipath_query_gid; - dev->alloc_ucontext = ipath_alloc_ucontext; - dev->dealloc_ucontext = ipath_dealloc_ucontext; - dev->alloc_pd = ipath_alloc_pd; - dev->dealloc_pd = ipath_dealloc_pd; - dev->create_ah = ipath_create_ah; - dev->destroy_ah = ipath_destroy_ah; - dev->query_ah = ipath_query_ah; - dev->create_srq = ipath_create_srq; - dev->modify_srq = ipath_modify_srq; - dev->query_srq = ipath_query_srq; - dev->destroy_srq = ipath_destroy_srq; - dev->create_qp = ipath_create_qp; - dev->modify_qp = ipath_modify_qp; - dev->query_qp = ipath_query_qp; - dev->destroy_qp = ipath_destroy_qp; - dev->post_send = ipath_post_send; - dev->post_recv = ipath_post_receive; - dev->post_srq_recv = ipath_post_srq_receive; - dev->create_cq = ipath_create_cq; - dev->destroy_cq = ipath_destroy_cq; - dev->resize_cq = ipath_resize_cq; - dev->poll_cq = ipath_poll_cq; - dev->req_notify_cq = ipath_req_notify_cq; - dev->get_dma_mr = ipath_get_dma_mr; - dev->reg_phys_mr = ipath_reg_phys_mr; - dev->reg_user_mr = ipath_reg_user_mr; - dev->dereg_mr = ipath_dereg_mr; - dev->alloc_fmr = ipath_alloc_fmr; - dev->map_phys_fmr = ipath_map_phys_fmr; - dev->unmap_fmr = ipath_unmap_fmr; - dev->dealloc_fmr = ipath_dealloc_fmr; - dev->attach_mcast = ipath_multicast_attach; - dev->detach_mcast = ipath_multicast_detach; - dev->process_mad = ipath_process_mad; - dev->mmap = ipath_mmap; - dev->dma_ops = &ipath_dma_mapping_ops; - - snprintf(dev->node_desc, sizeof(dev->node_desc), - IPATH_IDSTR " %s", init_utsname()->nodename); - - ret = ib_register_device(dev, NULL); - if (ret) - goto err_reg; - - ret = ipath_verbs_register_sysfs(dev); - if (ret) - goto err_class; - - enable_timer(dd); - - goto bail; - -err_class: - ib_unregister_device(dev); -err_reg: - kfree(idev->lk_table.table); -err_lk: - kfree(idev->qp_table.table); -err_qp: - kfree(idev->txreq_bufs); -err_tx: - ib_dealloc_device(dev); - ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); - idev = NULL; - -bail: - dd->verbs_dev = idev; - return ret; -} - -void ipath_unregister_ib_device(struct ipath_ibdev *dev) -{ - struct ib_device *ibdev = &dev->ibdev; - u32 qps_inuse; - - ib_unregister_device(ibdev); - - disable_timer(dev->dd); - - if (!list_empty(&dev->pending[0]) || - !list_empty(&dev->pending[1]) || - !list_empty(&dev->pending[2])) - ipath_dev_err(dev->dd, "pending list not empty!\n"); - if (!list_empty(&dev->piowait)) - ipath_dev_err(dev->dd, "piowait list not empty!\n"); - if (!list_empty(&dev->rnrwait)) - ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); - if (!ipath_mcast_tree_empty()) - ipath_dev_err(dev->dd, "multicast table memory leak!\n"); - /* - * Note that ipath_unregister_ib_device() can be called before all - * the QPs are destroyed! - */ - qps_inuse = ipath_free_all_qps(&dev->qp_table); - if (qps_inuse) - ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", - qps_inuse); - kfree(dev->qp_table.table); - kfree(dev->lk_table.table); - kfree(dev->txreq_bufs); - ib_dealloc_device(ibdev); -} - -static ssize_t show_rev(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ipath_ibdev *dev = - container_of(device, struct ipath_ibdev, ibdev.dev); - - return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); -} - -static ssize_t show_hca(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ipath_ibdev *dev = - container_of(device, struct ipath_ibdev, ibdev.dev); - int ret; - - ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); - if (ret < 0) - goto bail; - strcat(buf, "\n"); - ret = strlen(buf); - -bail: - return ret; -} - -static ssize_t show_stats(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ipath_ibdev *dev = - container_of(device, struct ipath_ibdev, ibdev.dev); - int i; - int len; - - len = sprintf(buf, - "RC resends %d\n" - "RC no QACK %d\n" - "RC ACKs %d\n" - "RC SEQ NAKs %d\n" - "RC RDMA seq %d\n" - "RC RNR NAKs %d\n" - "RC OTH NAKs %d\n" - "RC timeouts %d\n" - "RC RDMA dup %d\n" - "piobuf wait %d\n" - "unaligned %d\n" - "PKT drops %d\n" - "WQE errs %d\n", - dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, - dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, - dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, - dev->n_pkt_drops, dev->n_wqe_errs); - for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { - const struct ipath_opcode_stats *si = &dev->opstats[i]; - - if (!si->n_packets && !si->n_bytes) - continue; - len += sprintf(buf + len, "%02x %llu/%llu\n", i, - (unsigned long long) si->n_packets, - (unsigned long long) si->n_bytes); - } - return len; -} - -static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); -static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); -static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL); - -static struct device_attribute *ipath_class_attributes[] = { - &dev_attr_hw_rev, - &dev_attr_hca_type, - &dev_attr_board_id, - &dev_attr_stats -}; - -static int ipath_verbs_register_sysfs(struct ib_device *dev) -{ - int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) { - ret = device_create_file(&dev->dev, - ipath_class_attributes[i]); - if (ret) - goto bail; - } - return 0; -bail: - for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) - device_remove_file(&dev->dev, ipath_class_attributes[i]); - return ret; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_verbs.h b/kernel/drivers/infiniband/hw/ipath/ipath_verbs.h deleted file mode 100644 index ae6cff4ab..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_verbs.h +++ /dev/null @@ -1,936 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IPATH_VERBS_H -#define IPATH_VERBS_H - -#include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/kref.h> -#include <rdma/ib_pack.h> -#include <rdma/ib_user_verbs.h> - -#include "ipath_kernel.h" - -#define IPATH_MAX_RDMA_ATOMIC 4 - -#define QPN_MAX (1 << 24) -#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) - -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define IPATH_UVERBS_ABI_VERSION 2 - -/* - * Define an ib_cq_notify value that is not valid so we know when CQ - * notifications are armed. - */ -#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - -/* Flags for checking QP state (see ib_ipath_state_ops[]) */ -#define IPATH_POST_SEND_OK 0x01 -#define IPATH_POST_RECV_OK 0x02 -#define IPATH_PROCESS_RECV_OK 0x04 -#define IPATH_PROCESS_SEND_OK 0x08 -#define IPATH_PROCESS_NEXT_SEND_OK 0x10 -#define IPATH_FLUSH_SEND 0x20 -#define IPATH_FLUSH_RECV 0x40 -#define IPATH_PROCESS_OR_FLUSH_SEND \ - (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND) - -/* IB Performance Manager status values */ -#define IB_PMA_SAMPLE_STATUS_DONE 0x00 -#define IB_PMA_SAMPLE_STATUS_STARTED 0x01 -#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 - -/* Mandatory IB performance counter select values. */ -#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) -#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) -#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) -#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) -#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) - -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __attribute__ ((packed)); - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __attribute__ ((packed)); - -struct ipath_other_headers { - __be32 bth[3]; - union { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - struct ib_atomic_eth atomic_eth; - } u; -} __attribute__ ((packed)); - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct ipath_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct ipath_other_headers oth; - } l; - struct ipath_other_headers oth; - } u; -} __attribute__ ((packed)); - -struct ipath_pio_header { - __le32 pbc[2]; - struct ipath_ib_header hdr; -} __attribute__ ((packed)); - -/* - * There is one struct ipath_mcast for each multicast GID. - * All attached QPs are then stored as a list of - * struct ipath_mcast_qp. - */ -struct ipath_mcast_qp { - struct list_head list; - struct ipath_qp *qp; -}; - -struct ipath_mcast { - struct rb_node rb_node; - union ib_gid mgid; - struct list_head qp_list; - wait_queue_head_t wait; - atomic_t refcount; - int n_attached; -}; - -/* Protection domain */ -struct ipath_pd { - struct ib_pd ibpd; - int user; /* non-zero if created from user space */ -}; - -/* Address Handle */ -struct ipath_ah { - struct ib_ah ibah; - struct ib_ah_attr attr; -}; - -/* - * This structure is used by ipath_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct ipath_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and completion queue entries as a single memory allocation so - * it can be mmap'ed into user space. - */ -struct ipath_cq_wc { - u32 head; /* index of next entry to fill */ - u32 tail; /* index of next ib_poll_cq() entry */ - union { - /* these are actually size ibcq.cqe + 1 */ - struct ib_uverbs_wc uqueue[0]; - struct ib_wc kqueue[0]; - }; -}; - -/* - * The completion queue structure. - */ -struct ipath_cq { - struct ib_cq ibcq; - struct tasklet_struct comptask; - spinlock_t lock; - u8 notify; - u8 triggered; - struct ipath_cq_wc *queue; - struct ipath_mmap_info *ip; -}; - -/* - * A segment is a linear region of low physical memory. - * XXX Maybe we should use phys addr here and kmap()/kunmap(). - * Used by the verbs layer. - */ -struct ipath_seg { - void *vaddr; - size_t length; -}; - -/* The number of ipath_segs that fit in a page. */ -#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) - -struct ipath_segarray { - struct ipath_seg segs[IPATH_SEGSZ]; -}; - -struct ipath_mregion { - struct ib_pd *pd; /* shares refcnt of ibmr.pd */ - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of ipath_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - struct ipath_segarray *map[0]; /* the segments */ -}; - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct ipath_sge { - struct ipath_mregion *mr; - void *vaddr; /* kernel virtual address of segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -/* Memory region */ -struct ipath_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - struct ipath_mregion mr; /* must be last */ -}; - -/* - * Send work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->s_max_sge. - */ -struct ipath_swqe { - struct ib_send_wr wr; /* don't use wr.sg_list */ - u32 psn; /* first packet sequence number */ - u32 lpsn; /* last packet sequence number */ - u32 ssn; /* send sequence number */ - u32 length; /* total length of data in sg_list */ - struct ipath_sge sg_list[0]; -}; - -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). - */ -struct ipath_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct ipath_rwq { - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ - struct ipath_rwqe wq[0]; -}; - -struct ipath_rq { - struct ipath_rwq *wq; - spinlock_t lock; - u32 size; /* size of RWQE array */ - u8 max_sge; -}; - -struct ipath_srq { - struct ib_srq ibsrq; - struct ipath_rq rq; - struct ipath_mmap_info *ip; - /* send signal when number of RWQEs < limit */ - u32 limit; -}; - -struct ipath_sge_state { - struct ipath_sge *sg_list; /* next SGE to be used if any */ - struct ipath_sge sge; /* progress state for the current SGE */ - u8 num_sge; - u8 static_rate; -}; - -/* - * This structure holds the information that the send tasklet needs - * to send a RDMA read response or atomic operation. - */ -struct ipath_ack_entry { - u8 opcode; - u8 sent; - u32 psn; - union { - struct ipath_sge_state rdma_sge; - u64 atomic_data; - }; -}; - -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. - * - * Common variables are protected by both r_rq.lock and s_lock in that order - * which only happens in modify_qp() or changing the QP 'state'. - */ -struct ipath_qp { - struct ib_qp ibqp; - struct ipath_qp *next; /* link list for QPN hash table */ - struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ - struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */ - struct list_head piowait; /* link for wait PIO buf */ - struct list_head timerwait; /* link for waiting for timeouts */ - struct ib_ah_attr remote_ah_attr; - struct ipath_ib_header s_hdr; /* next packet header to send */ - atomic_t refcount; - wait_queue_head_t wait; - wait_queue_head_t wait_dma; - struct tasklet_struct s_task; - struct ipath_mmap_info *ip; - struct ipath_sge_state *s_cur_sge; - struct ipath_verbs_txreq *s_tx; - struct ipath_sge_state s_sge; /* current send request data */ - struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1]; - struct ipath_sge_state s_ack_rdma_sge; - struct ipath_sge_state s_rdma_read_sge; - struct ipath_sge_state r_sge; /* current receive data */ - spinlock_t s_lock; - atomic_t s_dma_busy; - u16 s_pkt_delay; - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ - u32 s_cur_size; /* size of send packet in bytes */ - u32 s_len; /* total length of s_sge */ - u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ - u32 s_last_psn; /* last response PSN processed */ - u32 s_psn; /* current packet sequence number */ - u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ - u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u64 r_wr_id; /* ID for current receive WQE */ - unsigned long r_aflags; - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ - u8 state; /* QP state */ - u8 s_state; /* opcode of last packet sent */ - u8 s_ack_state; /* opcode of packet to ACK */ - u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_state; /* opcode of last packet received */ - u8 r_nak_state; /* non-zero if NAK is pending */ - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_flags; - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - u8 qp_access_flags; - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; - u8 s_retry; /* requester retry counter */ - u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_pkey_index; /* PKEY index to use */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ - u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ - u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - u8 s_flags; - u8 s_dmult; - u8 s_draining; - u8 timeout; /* Timeout for this QP */ - enum ib_mtu path_mtu; - u32 remote_qpn; - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_last; /* last un-ACK'ed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - struct ipath_swqe *s_wq; /* send work queue */ - struct ipath_swqe *s_wqe; - struct ipath_sge *r_ud_sg_list; - struct ipath_rq r_rq; /* receive work queue */ - struct ipath_sge r_sg_list[0]; /* verified SGEs */ -}; - -/* - * Atomic bit definitions for r_aflags. - */ -#define IPATH_R_WRID_VALID 0 - -/* - * Bit definitions for r_flags. - */ -#define IPATH_R_REUSE_SGE 0x01 -#define IPATH_R_RDMAR_SEQ 0x02 - -/* - * Bit definitions for s_flags. - * - * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs - * before processing the next SWQE - * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs - * before processing the next SWQE - * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. - * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE - * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating - * next send completion entry not via send DMA. - */ -#define IPATH_S_SIGNAL_REQ_WR 0x01 -#define IPATH_S_FENCE_PENDING 0x02 -#define IPATH_S_RDMAR_PENDING 0x04 -#define IPATH_S_ACK_PENDING 0x08 -#define IPATH_S_BUSY 0x10 -#define IPATH_S_WAITING 0x20 -#define IPATH_S_WAIT_SSN_CREDIT 0x40 -#define IPATH_S_WAIT_DMA 0x80 - -#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ - IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA) - -#define IPATH_PSN_CREDIT 512 - -/* - * Since struct ipath_swqe is not a fixed size, we can't simply index into - * struct ipath_qp.s_wq. This function does the array index computation. - */ -static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp, - unsigned n) -{ - return (struct ipath_swqe *)((char *)qp->s_wq + - (sizeof(struct ipath_swqe) + - qp->s_max_sge * - sizeof(struct ipath_sge)) * n); -} - -/* - * Since struct ipath_rwqe is not a fixed size, we can't simply index into - * struct ipath_rwq.wq. This function does the array index computation. - */ -static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, - unsigned n) -{ - return (struct ipath_rwqe *) - ((char *) rq->wq->wq + - (sizeof(struct ipath_rwqe) + - rq->max_sge * sizeof(struct ib_sge)) * n); -} - -/* - * QPN-map pages start out as NULL, they get allocated upon - * first use and are never deallocated. This way, - * large bitmaps are not allocated unless large numbers of QPs are used. - */ -struct qpn_map { - atomic_t n_free; - void *page; -}; - -struct ipath_qp_table { - spinlock_t lock; - u32 last; /* last QP number allocated */ - u32 max; /* size of the hash table */ - u32 nmaps; /* size of the map table */ - struct ipath_qp **table; - /* bit map of free numbers */ - struct qpn_map map[QPNMAP_ENTRIES]; -}; - -struct ipath_lkey_table { - spinlock_t lock; - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ - u32 max; /* size of the table */ - struct ipath_mregion **table; -}; - -struct ipath_opcode_stats { - u64 n_packets; /* number of packets */ - u64 n_bytes; /* total number of bytes */ -}; - -struct ipath_ibdev { - struct ib_device ibdev; - struct ipath_devdata *dd; - struct list_head pending_mmaps; - spinlock_t mmap_offset_lock; - u32 mmap_offset; - int ib_unit; /* This is the device number */ - u16 sm_lid; /* in host order */ - u8 sm_sl; - u8 mkeyprot; - /* non-zero when timer is set */ - unsigned long mkey_lease_timeout; - - /* The following fields are really per port. */ - struct ipath_qp_table qp_table; - struct ipath_lkey_table lk_table; - struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */ - struct list_head piowait; /* list for wait PIO buf */ - struct list_head txreq_free; - void *txreq_bufs; - /* list of QPs waiting for RNR timer */ - struct list_head rnrwait; - spinlock_t pending_lock; - __be64 sys_image_guid; /* in network order */ - __be64 gid_prefix; /* in network order */ - __be64 mkey; - - u32 n_pds_allocated; /* number of PDs allocated for device */ - spinlock_t n_pds_lock; - u32 n_ahs_allocated; /* number of AHs allocated for device */ - spinlock_t n_ahs_lock; - u32 n_cqs_allocated; /* number of CQs allocated for device */ - spinlock_t n_cqs_lock; - u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; - u32 n_srqs_allocated; /* number of SRQs allocated for device */ - spinlock_t n_srqs_lock; - u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ - spinlock_t n_mcast_grps_lock; - - u64 ipath_sword; /* total dwords sent (sample result) */ - u64 ipath_rword; /* total dwords received (sample result) */ - u64 ipath_spkts; /* total packets sent (sample result) */ - u64 ipath_rpkts; /* total packets received (sample result) */ - /* # of ticks no data sent (sample result) */ - u64 ipath_xmit_wait; - u64 rcv_errors; /* # of packets with SW detected rcv errs */ - u64 n_unicast_xmit; /* total unicast packets sent */ - u64 n_unicast_rcv; /* total unicast packets received */ - u64 n_multicast_xmit; /* total multicast packets sent */ - u64 n_multicast_rcv; /* total multicast packets received */ - u64 z_symbol_error_counter; /* starting count for PMA */ - u64 z_link_error_recovery_counter; /* starting count for PMA */ - u64 z_link_downed_counter; /* starting count for PMA */ - u64 z_port_rcv_errors; /* starting count for PMA */ - u64 z_port_rcv_remphys_errors; /* starting count for PMA */ - u64 z_port_xmit_discards; /* starting count for PMA */ - u64 z_port_xmit_data; /* starting count for PMA */ - u64 z_port_rcv_data; /* starting count for PMA */ - u64 z_port_xmit_packets; /* starting count for PMA */ - u64 z_port_rcv_packets; /* starting count for PMA */ - u32 z_pkey_violations; /* starting count for PMA */ - u32 z_local_link_integrity_errors; /* starting count for PMA */ - u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ - u32 z_vl15_dropped; /* starting count for PMA */ - u32 n_rc_resends; - u32 n_rc_acks; - u32 n_rc_qacks; - u32 n_seq_naks; - u32 n_rdma_seq; - u32 n_rnr_naks; - u32 n_other_naks; - u32 n_timeouts; - u32 n_pkt_drops; - u32 n_vl15_dropped; - u32 n_wqe_errs; - u32 n_rdma_dup_busy; - u32 n_piowait; - u32 n_unaligned; - u32 port_cap_flags; - u32 pma_sample_start; - u32 pma_sample_interval; - __be16 pma_counter_select[5]; - u16 pma_tag; - u16 qkey_violations; - u16 mkey_violations; - u16 mkey_lease_period; - u16 pending_index; /* which pending queue is active */ - u8 pma_sample_status; - u8 subnet_timeout; - u8 vl_high_limit; - struct ipath_opcode_stats opstats[128]; -}; - -struct ipath_verbs_counters { - u64 symbol_error_counter; - u64 link_error_recovery_counter; - u64 link_downed_counter; - u64 port_rcv_errors; - u64 port_rcv_remphys_errors; - u64 port_xmit_discards; - u64 port_xmit_data; - u64 port_rcv_data; - u64 port_xmit_packets; - u64 port_rcv_packets; - u32 local_link_integrity_errors; - u32 excessive_buffer_overrun_errors; - u32 vl15_dropped; -}; - -struct ipath_verbs_txreq { - struct ipath_qp *qp; - struct ipath_swqe *wqe; - u32 map_len; - u32 len; - struct ipath_sge_state *ss; - struct ipath_pio_header hdr; - struct ipath_sdma_txreq txreq; -}; - -static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct ipath_mr, ibmr); -} - -static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct ipath_pd, ibpd); -} - -static inline struct ipath_ah *to_iah(struct ib_ah *ibah) -{ - return container_of(ibah, struct ipath_ah, ibah); -} - -static inline struct ipath_cq *to_icq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct ipath_cq, ibcq); -} - -static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq) -{ - return container_of(ibsrq, struct ipath_srq, ibsrq); -} - -static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct ipath_qp, ibqp); -} - -static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) -{ - return container_of(ibdev, struct ipath_ibdev, ibdev); -} - -/* - * This must be called with s_lock held. - */ -static inline void ipath_schedule_send(struct ipath_qp *qp) -{ - if (qp->s_flags & IPATH_S_ANY_WAIT) - qp->s_flags &= ~IPATH_S_ANY_WAIT; - if (!(qp->s_flags & IPATH_S_BUSY)) - tasklet_hi_schedule(&qp->s_task); -} - -int ipath_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); - -/* - * Compare the lower 24 bits of the two values. - * Returns an integer <, ==, or > than zero. - */ -static inline int ipath_cmp24(u32 a, u32 b) -{ - return (((int) a) - ((int) b)) << 8; -} - -struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid); - -int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait); - -int ipath_get_counters(struct ipath_devdata *dd, - struct ipath_verbs_counters *cntrs); - -int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); - -int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); - -int ipath_mcast_tree_empty(void); - -__be32 ipath_compute_aeth(struct ipath_qp *qp); - -struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn); - -struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - -int ipath_destroy_qp(struct ib_qp *ibqp); - -int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err); - -int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata); - -int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr); - -unsigned ipath_free_all_qps(struct ipath_qp_table *qpt); - -int ipath_init_qp_table(struct ipath_ibdev *idev, int size); - -void ipath_get_credit(struct ipath_qp *qp, u32 aeth); - -unsigned ipath_ib_rate_to_mult(enum ib_rate rate); - -int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, - u32 hdrwords, struct ipath_sge_state *ss, u32 len); - -void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length); - -void ipath_skip_sge(struct ipath_sge_state *ss, u32 length); - -void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp); - -void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp); - -void ipath_restart_rc(struct ipath_qp *qp, u32 psn); - -void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err); - -int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); - -void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct ipath_qp *qp); - -int ipath_alloc_lkey(struct ipath_lkey_table *rkt, - struct ipath_mregion *mr); - -void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey); - -int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, - struct ib_sge *sge, int acc); - -int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, - u32 len, u64 vaddr, u32 rkey, int acc); - -int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); - -struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); - -int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, - struct ib_udata *udata); - -int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); - -int ipath_destroy_srq(struct ib_srq *ibsrq); - -void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); - -int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); - -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ipath_destroy_cq(struct ib_cq *ibcq); - -int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); - -int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); - -struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc); - -struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start); - -struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata); - -int ipath_dereg_mr(struct ib_mr *ibmr); - -struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, - int list_len, u64 iova); - -int ipath_unmap_fmr(struct list_head *fmr_list); - -int ipath_dealloc_fmr(struct ib_fmr *ibfmr); - -void ipath_release_mmap_info(struct kref *ref); - -struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, - u32 size, - struct ib_ucontext *context, - void *obj); - -void ipath_update_mmap_info(struct ipath_ibdev *dev, - struct ipath_mmap_info *ip, - u32 size, void *obj); - -int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); - -void ipath_insert_rnr_queue(struct ipath_qp *qp); - -int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, - u32 *lengthp, struct ipath_sge_state *ss); - -int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); - -u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, - struct ib_global_route *grh, u32 hwords, u32 nwords); - -void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 bth0, u32 bth2); - -void ipath_do_send(unsigned long data); - -void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, - enum ib_wc_status status); - -int ipath_make_rc_req(struct ipath_qp *qp); - -int ipath_make_uc_req(struct ipath_qp *qp); - -int ipath_make_ud_req(struct ipath_qp *qp); - -int ipath_register_ib_device(struct ipath_devdata *); - -void ipath_unregister_ib_device(struct ipath_ibdev *); - -void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); - -int ipath_ib_piobufavail(struct ipath_ibdev *); - -unsigned ipath_get_npkeys(struct ipath_devdata *); - -u32 ipath_get_cr_errpkey(struct ipath_devdata *); - -unsigned ipath_get_pkey(struct ipath_devdata *, unsigned); - -extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; - -/* - * Below converts HCA-specific LinkTrainingState to IB PhysPortState - * values. - */ -extern const u8 ipath_cvt_physportstate[]; -#define IB_PHYSPORTSTATE_SLEEP 1 -#define IB_PHYSPORTSTATE_POLL 2 -#define IB_PHYSPORTSTATE_DISABLED 3 -#define IB_PHYSPORTSTATE_CFG_TRAIN 4 -#define IB_PHYSPORTSTATE_LINKUP 5 -#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6 - -extern const int ib_ipath_state_ops[]; - -extern unsigned int ib_ipath_lkey_table_size; - -extern unsigned int ib_ipath_max_cqes; - -extern unsigned int ib_ipath_max_cqs; - -extern unsigned int ib_ipath_max_qp_wrs; - -extern unsigned int ib_ipath_max_qps; - -extern unsigned int ib_ipath_max_sges; - -extern unsigned int ib_ipath_max_mcast_grps; - -extern unsigned int ib_ipath_max_mcast_qp_attached; - -extern unsigned int ib_ipath_max_srqs; - -extern unsigned int ib_ipath_max_srq_sges; - -extern unsigned int ib_ipath_max_srq_wrs; - -extern const u32 ib_ipath_rnr_table[]; - -extern struct ib_dma_mapping_ops ipath_dma_mapping_ops; - -#endif /* IPATH_VERBS_H */ diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/kernel/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c deleted file mode 100644 index 6216ea923..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/rculist.h> -#include <linux/sched.h> -#include <linux/slab.h> - -#include "ipath_verbs.h" - -/* - * Global table of GID to attached QPs. - * The table is global to all ipath devices since a send from one QP/device - * needs to be locally routed to any locally attached QPs on the same - * or different device. - */ -static struct rb_root mcast_tree; -static DEFINE_SPINLOCK(mcast_lock); - -/** - * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct - * @qp: the QP to link - */ -static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp) -{ - struct ipath_mcast_qp *mqp; - - mqp = kmalloc(sizeof *mqp, GFP_KERNEL); - if (!mqp) - goto bail; - - mqp->qp = qp; - atomic_inc(&qp->refcount); - -bail: - return mqp; -} - -static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp) -{ - struct ipath_qp *qp = mqp->qp; - - /* Notify ipath_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - - kfree(mqp); -} - -/** - * ipath_mcast_alloc - allocate the multicast GID structure - * @mgid: the multicast GID - * - * A list of QPs will be attached to this structure. - */ -static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid) -{ - struct ipath_mcast *mcast; - - mcast = kmalloc(sizeof *mcast, GFP_KERNEL); - if (!mcast) - goto bail; - - mcast->mgid = *mgid; - INIT_LIST_HEAD(&mcast->qp_list); - init_waitqueue_head(&mcast->wait); - atomic_set(&mcast->refcount, 0); - mcast->n_attached = 0; - -bail: - return mcast; -} - -static void ipath_mcast_free(struct ipath_mcast *mcast) -{ - struct ipath_mcast_qp *p, *tmp; - - list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) - ipath_mcast_qp_free(p); - - kfree(mcast); -} - -/** - * ipath_mcast_find - search the global table for the given multicast GID - * @mgid: the multicast GID to search for - * - * Returns NULL if not found. - * - * The caller is responsible for decrementing the reference count if found. - */ -struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid) -{ - struct rb_node *n; - unsigned long flags; - struct ipath_mcast *mcast; - - spin_lock_irqsave(&mcast_lock, flags); - n = mcast_tree.rb_node; - while (n) { - int ret; - - mcast = rb_entry(n, struct ipath_mcast, rb_node); - - ret = memcmp(mgid->raw, mcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) - n = n->rb_left; - else if (ret > 0) - n = n->rb_right; - else { - atomic_inc(&mcast->refcount); - spin_unlock_irqrestore(&mcast_lock, flags); - goto bail; - } - } - spin_unlock_irqrestore(&mcast_lock, flags); - - mcast = NULL; - -bail: - return mcast; -} - -/** - * ipath_mcast_add - insert mcast GID into table and attach QP struct - * @mcast: the mcast GID table - * @mqp: the QP to attach - * - * Return zero if both were added. Return EEXIST if the GID was already in - * the table but the QP was added. Return ESRCH if the QP was already - * attached and neither structure was added. - */ -static int ipath_mcast_add(struct ipath_ibdev *dev, - struct ipath_mcast *mcast, - struct ipath_mcast_qp *mqp) -{ - struct rb_node **n = &mcast_tree.rb_node; - struct rb_node *pn = NULL; - int ret; - - spin_lock_irq(&mcast_lock); - - while (*n) { - struct ipath_mcast *tmcast; - struct ipath_mcast_qp *p; - - pn = *n; - tmcast = rb_entry(pn, struct ipath_mcast, rb_node); - - ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) { - n = &pn->rb_left; - continue; - } - if (ret > 0) { - n = &pn->rb_right; - continue; - } - - /* Search the QP list to see if this is already there. */ - list_for_each_entry_rcu(p, &tmcast->qp_list, list) { - if (p->qp == mqp->qp) { - ret = ESRCH; - goto bail; - } - } - if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) { - ret = ENOMEM; - goto bail; - } - - tmcast->n_attached++; - - list_add_tail_rcu(&mqp->list, &tmcast->qp_list); - ret = EEXIST; - goto bail; - } - - spin_lock(&dev->n_mcast_grps_lock); - if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) { - spin_unlock(&dev->n_mcast_grps_lock); - ret = ENOMEM; - goto bail; - } - - dev->n_mcast_grps_allocated++; - spin_unlock(&dev->n_mcast_grps_lock); - - mcast->n_attached++; - - list_add_tail_rcu(&mqp->list, &mcast->qp_list); - - atomic_inc(&mcast->refcount); - rb_link_node(&mcast->rb_node, pn, n); - rb_insert_color(&mcast->rb_node, &mcast_tree); - - ret = 0; - -bail: - spin_unlock_irq(&mcast_lock); - - return ret; -} - -int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ipath_qp *qp = to_iqp(ibqp); - struct ipath_ibdev *dev = to_idev(ibqp->device); - struct ipath_mcast *mcast; - struct ipath_mcast_qp *mqp; - int ret; - - /* - * Allocate data structures since its better to do this outside of - * spin locks and it will most likely be needed. - */ - mcast = ipath_mcast_alloc(gid); - if (mcast == NULL) { - ret = -ENOMEM; - goto bail; - } - mqp = ipath_mcast_qp_alloc(qp); - if (mqp == NULL) { - ipath_mcast_free(mcast); - ret = -ENOMEM; - goto bail; - } - switch (ipath_mcast_add(dev, mcast, mqp)) { - case ESRCH: - /* Neither was used: can't attach the same QP twice. */ - ipath_mcast_qp_free(mqp); - ipath_mcast_free(mcast); - ret = -EINVAL; - goto bail; - case EEXIST: /* The mcast wasn't used */ - ipath_mcast_free(mcast); - break; - case ENOMEM: - /* Exceeded the maximum number of mcast groups. */ - ipath_mcast_qp_free(mqp); - ipath_mcast_free(mcast); - ret = -ENOMEM; - goto bail; - default: - break; - } - - ret = 0; - -bail: - return ret; -} - -int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ipath_qp *qp = to_iqp(ibqp); - struct ipath_ibdev *dev = to_idev(ibqp->device); - struct ipath_mcast *mcast = NULL; - struct ipath_mcast_qp *p, *tmp; - struct rb_node *n; - int last = 0; - int ret; - - spin_lock_irq(&mcast_lock); - - /* Find the GID in the mcast table. */ - n = mcast_tree.rb_node; - while (1) { - if (n == NULL) { - spin_unlock_irq(&mcast_lock); - ret = -EINVAL; - goto bail; - } - - mcast = rb_entry(n, struct ipath_mcast, rb_node); - ret = memcmp(gid->raw, mcast->mgid.raw, - sizeof(union ib_gid)); - if (ret < 0) - n = n->rb_left; - else if (ret > 0) - n = n->rb_right; - else - break; - } - - /* Search the QP list. */ - list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { - if (p->qp != qp) - continue; - /* - * We found it, so remove it, but don't poison the forward - * link until we are sure there are no list walkers. - */ - list_del_rcu(&p->list); - mcast->n_attached--; - - /* If this was the last attached QP, remove the GID too. */ - if (list_empty(&mcast->qp_list)) { - rb_erase(&mcast->rb_node, &mcast_tree); - last = 1; - } - break; - } - - spin_unlock_irq(&mcast_lock); - - if (p) { - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - ipath_mcast_qp_free(p); - } - if (last) { - atomic_dec(&mcast->refcount); - wait_event(mcast->wait, !atomic_read(&mcast->refcount)); - ipath_mcast_free(mcast); - spin_lock_irq(&dev->n_mcast_grps_lock); - dev->n_mcast_grps_allocated--; - spin_unlock_irq(&dev->n_mcast_grps_lock); - } - - ret = 0; - -bail: - return ret; -} - -int ipath_mcast_tree_empty(void) -{ - return mcast_tree.rb_node == NULL; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/kernel/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c deleted file mode 100644 index 1a7e20a75..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file is conditionally built on PowerPC only. Otherwise weak symbol - * versions of the functions exported from here are used. - */ - -#include "ipath_kernel.h" - -/** - * ipath_enable_wc - enable write combining for MMIO writes to the device - * @dd: infinipath device - * - * Nothing to do on PowerPC, so just return without error. - */ -int ipath_enable_wc(struct ipath_devdata *dd) -{ - return 0; -} diff --git a/kernel/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/kernel/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c deleted file mode 100644 index 4ad0b932d..000000000 --- a/kernel/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file is conditionally built on x86_64 only. Otherwise weak symbol - * versions of the functions exported from here are used. - */ - -#include <linux/pci.h> -#include <asm/mtrr.h> -#include <asm/processor.h> - -#include "ipath_kernel.h" - -/** - * ipath_enable_wc - enable write combining for MMIO writes to the device - * @dd: infinipath device - * - * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable - * write combining. - */ -int ipath_enable_wc(struct ipath_devdata *dd) -{ - int ret = 0; - u64 pioaddr, piolen; - unsigned bits; - const unsigned long addr = pci_resource_start(dd->pcidev, 0); - const size_t len = pci_resource_len(dd->pcidev, 0); - - /* - * Set the PIO buffers to be WCCOMB, so we get HT bursts to the - * chip. Linux (possibly the hardware) requires it to be on a power - * of 2 address matching the length (which has to be a power of 2). - * For rev1, that means the base address, for rev2, it will be just - * the PIO buffers themselves. - * For chips with two sets of buffers, the calculations are - * somewhat more complicated; we need to sum, and the piobufbase - * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. - * The buffers are still packed, so a single range covers both. - */ - if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */ - unsigned long pio2kbase, pio4kbase; - pio2kbase = dd->ipath_piobufbase & 0xffffffffUL; - pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL; - if (pio2kbase < pio4kbase) { /* all, for now */ - pioaddr = addr + pio2kbase; - piolen = pio4kbase - pio2kbase + - dd->ipath_piobcnt4k * dd->ipath_4kalign; - } else { - pioaddr = addr + pio4kbase; - piolen = pio2kbase - pio4kbase + - dd->ipath_piobcnt2k * dd->ipath_palign; - } - } else { /* single buffer size (2K, currently) */ - pioaddr = addr + dd->ipath_piobufbase; - piolen = dd->ipath_piobcnt2k * dd->ipath_palign + - dd->ipath_piobcnt4k * dd->ipath_4kalign; - } - - for (bits = 0; !(piolen & (1ULL << bits)); bits++) - /* do nothing */ ; - - if (piolen != (1ULL << bits)) { - piolen >>= bits; - while (piolen >>= 1) - bits++; - piolen = 1ULL << (bits + 1); - } - if (pioaddr & (piolen - 1)) { - u64 atmp; - ipath_dbg("pioaddr %llx not on right boundary for size " - "%llx, fixing\n", - (unsigned long long) pioaddr, - (unsigned long long) piolen); - atmp = pioaddr & ~(piolen - 1); - if (atmp < addr || (atmp + piolen) > (addr + len)) { - ipath_dev_err(dd, "No way to align address/size " - "(%llx/%llx), no WC mtrr\n", - (unsigned long long) atmp, - (unsigned long long) piolen << 1); - ret = -ENODEV; - } else { - ipath_dbg("changing WC base from %llx to %llx, " - "len from %llx to %llx\n", - (unsigned long long) pioaddr, - (unsigned long long) atmp, - (unsigned long long) piolen, - (unsigned long long) piolen << 1); - pioaddr = atmp; - piolen <<= 1; - } - } - - if (!ret) { - int cookie; - ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC " - "(addr %llx, len=0x%llx)\n", - (unsigned long long) pioaddr, - (unsigned long long) piolen); - cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); - if (cookie < 0) { - { - dev_info(&dd->pcidev->dev, - "mtrr_add() WC for PIO bufs " - "failed (%d)\n", - cookie); - ret = -EINVAL; - } - } else { - ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " - "cookie is %d\n", cookie); - dd->ipath_wc_cookie = cookie; - dd->ipath_wc_base = (unsigned long) pioaddr; - dd->ipath_wc_len = (unsigned long) piolen; - } - } - - return ret; -} - -/** - * ipath_disable_wc - disable write combining for MMIO writes to the device - * @dd: infinipath device - */ -void ipath_disable_wc(struct ipath_devdata *dd) -{ - if (dd->ipath_wc_cookie) { - int r; - ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); - r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base, - dd->ipath_wc_len); - if (r < 0) - dev_info(&dd->pcidev->dev, - "mtrr_del(%lx, %lx, %lx) failed: %d\n", - dd->ipath_wc_cookie, dd->ipath_wc_base, - dd->ipath_wc_len, r); - dd->ipath_wc_cookie = 0; /* even on failure */ - } -} diff --git a/kernel/drivers/infiniband/hw/mlx4/ah.c b/kernel/drivers/infiniband/hw/mlx4/ah.c index 33fdd5012..86af71351 100644 --- a/kernel/drivers/infiniband/hw/mlx4/ah.c +++ b/kernel/drivers/infiniband/hw/mlx4/ah.c @@ -76,7 +76,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; - u16 vlan_tag; + u16 vlan_tag = 0xffff; + union ib_gid sgid; + struct ib_gid_attr gid_attr; + int ret; memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); if (rdma_is_multicast_addr(&in6)) { @@ -85,11 +88,21 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr } else { memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN); } - vlan_tag = ah_attr->vlan_id; + ret = ib_get_cached_gid(pd->device, ah_attr->port_num, + ah_attr->grh.sgid_index, &sgid, &gid_attr); + if (ret) + return ERR_PTR(ret); + memset(ah->av.eth.s_mac, 0, ETH_ALEN); + if (gid_attr.ndev) { + if (is_vlan_dev(gid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); + memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(gid_attr.ndev); + } if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.eth.gid_index = ah_attr->grh.sgid_index; + ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); ah->av.eth.vlan = cpu_to_be16(vlan_tag); if (ah_attr->static_rate) { ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; diff --git a/kernel/drivers/infiniband/hw/mlx4/alias_GUID.c b/kernel/drivers/infiniband/hw/mlx4/alias_GUID.c index 0f00204d2..21cb41a60 100644 --- a/kernel/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/kernel/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -189,7 +189,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, { int i; u64 guid_indexes; - int slave_id; + int slave_id, slave_port; enum slave_port_state new_state; enum slave_port_state prev_state; __be64 tmp_cur_ag, form_cache_ag; @@ -217,6 +217,11 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; if (slave_id >= dev->dev->persist->num_vfs + 1) return; + + slave_port = mlx4_phys_to_slave_port(dev->dev, slave_id, port_num); + if (slave_port < 0) /* this port isn't available for the VF */ + continue; + tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, (NUM_ALIAS_GUID_IN_REC * block_num) + i); diff --git a/kernel/drivers/infiniband/hw/mlx4/cq.c b/kernel/drivers/infiniband/hw/mlx4/cq.c index 2857ed897..b88fc8f5a 100644 --- a/kernel/drivers/infiniband/hw/mlx4/cq.c +++ b/kernel/drivers/infiniband/hw/mlx4/cq.c @@ -166,10 +166,14 @@ err_buf: return err; } -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, +#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_cq *cq; struct mlx4_uar *uar; @@ -178,6 +182,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector if (entries < 1 || entries > dev->dev->caps.max_cqes) return ERR_PTR(-EINVAL); + if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + return ERR_PTR(-EINVAL); + cq = kmalloc(sizeof *cq, GFP_KERNEL); if (!cq) return ERR_PTR(-ENOMEM); @@ -188,6 +195,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; + cq->create_flags = attr->flags; INIT_LIST_HEAD(&cq->send_qp_list); INIT_LIST_HEAD(&cq->recv_qp_list); @@ -231,7 +239,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector vector = dev->eq_table[vector % ibdev->num_comp_vectors]; err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, vector, 0, 0); + cq->db.dma, &cq->mcq, vector, 0, + !!(cq->create_flags & IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); if (err) goto err_dbmap; @@ -809,7 +818,7 @@ repoll: wc->opcode = IB_WC_LSO; break; case MLX4_OPCODE_FMR: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; break; case MLX4_OPCODE_LOCAL_INVAL: wc->opcode = IB_WC_LOCAL_INV; @@ -862,7 +871,7 @@ repoll: if (is_eth) { wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; if (be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_VLAN_PRESENT_MASK) { + MLX4_CQE_CVLAN_PRESENT_MASK) { wc->vlan_id = be16_to_cpu(cqe->sl_vid) & MLX4_CQE_VID_MASK; } else { diff --git a/kernel/drivers/infiniband/hw/mlx4/mad.c b/kernel/drivers/infiniband/hw/mlx4/mad.c index 9cd2b002d..870e56b6b 100644 --- a/kernel/drivers/infiniband/hw/mlx4/mad.c +++ b/kernel/drivers/infiniband/hw/mlx4/mad.c @@ -64,14 +64,6 @@ enum { #define GUID_TBL_BLK_NUM_ENTRIES 8 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) -/* Counters should be saturate once they reach their maximum value */ -#define ASSIGN_32BIT_COUNTER(counter, value) do {\ - if ((value) > U32_MAX) \ - counter = cpu_to_be32(U32_MAX); \ - else \ - counter = cpu_to_be32(value); \ -} while (0) - struct mlx4_mad_rcv_buf { struct ib_grh grh; u8 payload[256]; @@ -111,8 +103,9 @@ __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) } int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + int port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { struct mlx4_cmd_mailbox *inmailbox, *outmailbox; void *inbox; @@ -220,7 +213,7 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ -static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, +static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad *mad, u16 prev_lid) { struct ib_port_info *pinfo; @@ -356,7 +349,7 @@ static void node_desc_override(struct ib_device *dev, } } -static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad) +static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, const struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; struct ib_mad_send_buf *send_buf; @@ -366,7 +359,8 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; /* @@ -463,7 +457,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, struct ib_grh *grh, struct ib_mad *mad) { struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_ud_wr wr; + struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *tun_ctx; struct mlx4_ib_demux_pv_qp *tun_qp; struct mlx4_rcv_tunnel_mad *tun_mad; @@ -586,20 +581,20 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map; list.length = sizeof (struct mlx4_rcv_tunnel_mad); - list.lkey = tun_ctx->mr->lkey; - - wr.wr.ud.ah = ah; - wr.wr.ud.port_num = port; - wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - wr.wr.ud.remote_qpn = dqpn; - wr.next = NULL; - wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; - - ret = ib_post_send(src_qp, &wr, &bad_wr); + list.lkey = tun_ctx->pd->local_dma_lkey; + + wr.ah = ah; + wr.port_num = port; + wr.remote_qkey = IB_QP_SET_QKEY; + wr.remote_qpn = dqpn; + wr.wr.next = NULL; + wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); + wr.wr.sg_list = &list; + wr.wr.num_sge = 1; + wr.wr.opcode = IB_WR_SEND; + wr.wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(src_qp, &wr.wr, &bad_wr); out: if (ret) ib_destroy_ah(ah); @@ -722,8 +717,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, } static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { u16 slid, prev_lid = 0; int err; @@ -825,34 +820,39 @@ static void edit_counter(struct mlx4_counter *cnt, } static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct mlx4_cmd_mailbox *mailbox; + struct mlx4_counter counter_stats; struct mlx4_ib_dev *dev = to_mdev(ibdev); - int err; - u32 inmod = dev->counters[port_num - 1] & 0xffff; - u8 mode; + struct counter_index *tmp_counter; + int err = IB_MAD_RESULT_FAILURE, stats_avail = 0; if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) return -EINVAL; - mailbox = mlx4_alloc_cmd_mailbox(dev->dev); - if (IS_ERR(mailbox)) - return IB_MAD_RESULT_FAILURE; - - err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, - MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, - MLX4_CMD_WRAPPED); - if (err) - err = IB_MAD_RESULT_FAILURE; - else { + memset(&counter_stats, 0, sizeof(counter_stats)); + mutex_lock(&dev->counters_table[port_num - 1].mutex); + list_for_each_entry(tmp_counter, + &dev->counters_table[port_num - 1].counters_list, + list) { + err = mlx4_get_counter_stats(dev->dev, + tmp_counter->index, + &counter_stats, 0); + if (err) { + err = IB_MAD_RESULT_FAILURE; + stats_avail = 0; + break; + } + stats_avail = 1; + } + mutex_unlock(&dev->counters_table[port_num - 1].mutex); + if (stats_avail) { memset(out_mad->data, 0, sizeof out_mad->data); - mode = ((struct mlx4_counter *)mailbox->buf)->counter_mode; - switch (mode & 0xf) { + switch (counter_stats.counter_mode & 0xf) { case 0: - edit_counter(mailbox->buf, - (void *)(out_mad->data + 40)); + edit_counter(&counter_stats, + (void *)(out_mad->data + 40)); err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; break; default: @@ -860,25 +860,43 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - mlx4_free_cmd_mailbox(dev->dev, mailbox); - return err; } int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { - switch (rdma_port_get_link_layer(ibdev, port_num)) { - case IB_LINK_LAYER_INFINIBAND: + struct mlx4_ib_dev *dev = to_mdev(ibdev); + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; + + /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA + * queries, should be called only by VFs and for that specific purpose + */ + if (link == IB_LINK_LAYER_INFINIBAND) { + if (mlx4_is_slave(dev->dev) && + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS) + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, in_mad, out_mad); - case IB_LINK_LAYER_ETHERNET: - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); - default: - return -EINVAL; } + + if (link == IB_LINK_LAYER_ETHERNET) + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + + return -EINVAL; } static void send_handler(struct ib_mad_agent *agent, @@ -1127,7 +1145,7 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, sg_list.addr = tun_qp->ring[index].map; sg_list.length = size; - sg_list.lkey = ctx->mr->lkey; + sg_list.lkey = ctx->pd->local_dma_lkey; recv_wr.next = NULL; recv_wr.sg_list = &sg_list; @@ -1166,10 +1184,11 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, - u8 *s_mac, struct ib_mad *mad) + u8 *s_mac, u16 vlan_id, struct ib_mad *mad) { struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_ud_wr wr; + struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *sqp_ctx; struct mlx4_ib_demux_pv_qp *sqp; struct mlx4_mad_snd_buf *sqp_mad; @@ -1238,24 +1257,27 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, list.addr = sqp->tx_ring[wire_tx_ix].buf.map; list.length = sizeof (struct mlx4_mad_snd_buf); - list.lkey = sqp_ctx->mr->lkey; - - wr.wr.ud.ah = ah; - wr.wr.ud.port_num = port; - wr.wr.ud.pkey_index = wire_pkey_ix; - wr.wr.ud.remote_qkey = qkey; - wr.wr.ud.remote_qpn = remote_qpn; - wr.next = NULL; - wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; + list.lkey = sqp_ctx->pd->local_dma_lkey; + + wr.ah = ah; + wr.port_num = port; + wr.pkey_index = wire_pkey_ix; + wr.remote_qkey = qkey; + wr.remote_qpn = remote_qpn; + wr.wr.next = NULL; + wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); + wr.wr.sg_list = &list; + wr.wr.num_sge = 1; + wr.wr.opcode = IB_WR_SEND; + wr.wr.send_flags = IB_SEND_SIGNALED; if (s_mac) memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + if (vlan_id < 0x1000) + vlan_id |= (attr->sl & 7) << 13; + to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id); - ret = ib_post_send(send_qp, &wr, &bad_wr); + ret = ib_post_send(send_qp, &wr.wr, &bad_wr); out: if (ret) ib_destroy_ah(ah); @@ -1289,6 +1311,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc u8 *slave_id; int slave; int port; + u16 vlan_id; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1365,19 +1388,22 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc * stadard address handle by decoding the tunnelled mlx4_ah fields */ memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); ah.ibah.device = ctx->ib_dev; + + port = be32_to_cpu(ah.av.ib.port_pd) >> 24; + port = mlx4_slave_convert_port(dev->dev, slave, port); + if (port < 0) + return; + ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff)); + mlx4_ib_query_ah(&ah.ibah, &ah_attr); if (ah_attr.ah_flags & IB_AH_GRH) fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); - port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num); - if (port < 0) - return; - ah_attr.port_num = port; memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); - ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); + vlan_id = be16_to_cpu(tunnel->hdr.vlan); /* if slave have default vlan use it */ mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, - &ah_attr.vlan_id, &ah_attr.sl); + &vlan_id, &ah_attr.sl); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? @@ -1385,7 +1411,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, wc->smac, &tunnel->mad); + &ah_attr, wc->smac, vlan_id, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, @@ -1773,6 +1799,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, int create_tun, struct mlx4_ib_demux_pv_ctx *ctx) { int ret, cq_size; + struct ib_cq_init_attr cq_attr = {}; if (ctx->state != DEMUX_PV_STATE_DOWN) return -EEXIST; @@ -1801,8 +1828,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, if (ctx->has_smi) cq_size *= 2; + cq_attr.cqe = cq_size; ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, - NULL, ctx, cq_size, 0); + NULL, ctx, &cq_attr); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); pr_err("Couldn't create tunnel CQ (%d)\n", ret); @@ -1816,19 +1844,12 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, goto err_cq; } - ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(ctx->mr)) { - ret = PTR_ERR(ctx->mr); - pr_err("Couldn't get tunnel DMA MR (%d)\n", ret); - goto err_pd; - } - if (ctx->has_smi) { ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun); if (ret) { pr_err("Couldn't create %s QP0 (%d)\n", create_tun ? "tunnel for" : "", ret); - goto err_mr; + goto err_pd; } } @@ -1865,10 +1886,6 @@ err_qp0: ib_destroy_qp(ctx->qp[0].qp); ctx->qp[0].qp = NULL; -err_mr: - ib_dereg_mr(ctx->mr); - ctx->mr = NULL; - err_pd: ib_dealloc_pd(ctx->pd); ctx->pd = NULL; @@ -1905,8 +1922,6 @@ static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port, ib_destroy_qp(ctx->qp[1].qp); ctx->qp[1].qp = NULL; mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1); - ib_dereg_mr(ctx->mr); - ctx->mr = NULL; ib_dealloc_pd(ctx->pd); ctx->pd = NULL; ib_destroy_cq(ctx->cq); @@ -2039,8 +2054,6 @@ static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx) ib_destroy_qp(sqp_ctx->qp[1].qp); sqp_ctx->qp[1].qp = NULL; mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0); - ib_dereg_mr(sqp_ctx->mr); - sqp_ctx->mr = NULL; ib_dealloc_pd(sqp_ctx->pd); sqp_ctx->pd = NULL; ib_destroy_cq(sqp_ctx->cq); diff --git a/kernel/drivers/infiniband/hw/mlx4/main.c b/kernel/drivers/infiniband/hw/mlx4/main.c index cc64400d4..97d6878f9 100644 --- a/kernel/drivers/infiniband/hw/mlx4/main.c +++ b/kernel/drivers/infiniband/hw/mlx4/main.c @@ -45,6 +45,9 @@ #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> + +#include <net/bonding.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/cmd.h> @@ -74,13 +77,6 @@ static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; -struct update_gid_work { - struct work_struct work; - union ib_gid gids[128]; - struct mlx4_ib_dev *dev; - int port; -}; - static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); static struct workqueue_struct *wq; @@ -93,8 +89,6 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } -static union ib_gid zgid; - static int check_flow_steering_support(struct mlx4_dev *dev) { int eth_num_ports = 0; @@ -131,15 +125,267 @@ static int num_ib_ports(struct mlx4_dev *dev) return ib_ports; } +static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num) +{ + struct mlx4_ib_dev *ibdev = to_mdev(device); + struct net_device *dev; + + rcu_read_lock(); + dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num); + + if (dev) { + if (mlx4_is_bonded(ibdev->dev)) { + struct net_device *upper = NULL; + + upper = netdev_master_upper_dev_get_rcu(dev); + if (upper) { + struct net_device *active; + + active = bond_option_active_slave_get_rcu(netdev_priv(upper)); + if (active) + dev = active; + } + } + } + if (dev) + dev_hold(dev); + + rcu_read_unlock(); + return dev; +} + +static int mlx4_ib_update_gids(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + struct mlx4_dev *dev = ibdev->dev; + int i; + union ib_gid *gid_tbl; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + + gid_tbl = mailbox->buf; + + for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) + memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid)); + + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | port_num, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (mlx4_is_bonded(dev)) + err += mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | 2, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +static int mlx4_ib_add_gid(struct ib_device *device, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context) +{ + struct mlx4_ib_dev *ibdev = to_mdev(device); + struct mlx4_ib_iboe *iboe = &ibdev->iboe; + struct mlx4_port_gid_table *port_gid_table; + int free = -1, found = -1; + int ret = 0; + int hw_update = 0; + int i; + struct gid_entry *gids = NULL; + + if (!rdma_cap_roce_gid_table(device, port_num)) + return -EINVAL; + + if (port_num > MLX4_MAX_PORTS) + return -EINVAL; + + if (!context) + return -EINVAL; + + port_gid_table = &iboe->gids[port_num - 1]; + spin_lock_bh(&iboe->lock); + for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { + if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) { + found = i; + break; + } + if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid))) + free = i; /* HW has space */ + } + + if (found < 0) { + if (free < 0) { + ret = -ENOSPC; + } else { + port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC); + if (!port_gid_table->gids[free].ctx) { + ret = -ENOMEM; + } else { + *context = port_gid_table->gids[free].ctx; + memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); + port_gid_table->gids[free].ctx->real_index = free; + port_gid_table->gids[free].ctx->refcount = 1; + hw_update = 1; + } + } + } else { + struct gid_cache_context *ctx = port_gid_table->gids[found].ctx; + *context = ctx; + ctx->refcount++; + } + if (!ret && hw_update) { + gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC); + if (!gids) { + ret = -ENOMEM; + } else { + for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) + memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); + } + } + spin_unlock_bh(&iboe->lock); + + if (!ret && hw_update) { + ret = mlx4_ib_update_gids(gids, ibdev, port_num); + kfree(gids); + } + + return ret; +} + +static int mlx4_ib_del_gid(struct ib_device *device, + u8 port_num, + unsigned int index, + void **context) +{ + struct gid_cache_context *ctx = *context; + struct mlx4_ib_dev *ibdev = to_mdev(device); + struct mlx4_ib_iboe *iboe = &ibdev->iboe; + struct mlx4_port_gid_table *port_gid_table; + int ret = 0; + int hw_update = 0; + struct gid_entry *gids = NULL; + + if (!rdma_cap_roce_gid_table(device, port_num)) + return -EINVAL; + + if (port_num > MLX4_MAX_PORTS) + return -EINVAL; + + port_gid_table = &iboe->gids[port_num - 1]; + spin_lock_bh(&iboe->lock); + if (ctx) { + ctx->refcount--; + if (!ctx->refcount) { + unsigned int real_index = ctx->real_index; + + memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid)); + kfree(port_gid_table->gids[real_index].ctx); + port_gid_table->gids[real_index].ctx = NULL; + hw_update = 1; + } + } + if (!ret && hw_update) { + int i; + + gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC); + if (!gids) { + ret = -ENOMEM; + } else { + for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) + memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); + } + } + spin_unlock_bh(&iboe->lock); + + if (!ret && hw_update) { + ret = mlx4_ib_update_gids(gids, ibdev, port_num); + kfree(gids); + } + return ret; +} + +int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, + u8 port_num, int index) +{ + struct mlx4_ib_iboe *iboe = &ibdev->iboe; + struct gid_cache_context *ctx = NULL; + union ib_gid gid; + struct mlx4_port_gid_table *port_gid_table; + int real_index = -EINVAL; + int i; + int ret; + unsigned long flags; + + if (port_num > MLX4_MAX_PORTS) + return -EINVAL; + + if (mlx4_is_bonded(ibdev->dev)) + port_num = 1; + + if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) + return index; + + ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL); + if (ret) + return ret; + + if (!memcmp(&gid, &zgid, sizeof(gid))) + return -EINVAL; + + spin_lock_irqsave(&iboe->lock, flags); + port_gid_table = &iboe->gids[port_num - 1]; + + for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) + if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) { + ctx = port_gid_table->gids[i].ctx; + break; + } + if (ctx) + real_index = ctx->real_index; + spin_unlock_irqrestore(&iboe->lock, flags); + return real_index; +} + static int mlx4_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; int have_ib_ports; + struct mlx4_uverbs_ex_query_device cmd; + struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0}; + struct mlx4_clock_params clock_params; + + if (uhw->inlen) { + if (uhw->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + if (cmd.reserved) + return -EINVAL; + } + + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) @@ -196,6 +442,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; } + props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = dev->dev->persist->pdev->device; @@ -208,6 +456,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); + props->max_sge_rd = MLX4_MAX_SGE_RD; props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; props->max_mr = dev->dev->quotas.mpt; @@ -229,7 +478,25 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; + props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; + props->timestamp_mask = 0xFFFFFFFFFFFFULL; + if (!mlx4_is_slave(dev->dev)) + err = mlx4_get_internal_clock_params(dev->dev, &clock_params); + + if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { + resp.response_length += sizeof(resp.hca_core_clock_offset); + if (!err && !mlx4_is_slave(dev->dev)) { + resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; + } + } + + if (uhw->outlen) { + err = ib_copy_to_udata(uhw, &resp, resp.response_length); + if (err) + goto out; + } out: kfree(in_mad); kfree(out_mad); @@ -375,12 +642,13 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; - if (is_bonded) - rtnl_lock(); /* required to get upper dev */ spin_lock_bh(&iboe->lock); ndev = iboe->netdevs[port - 1]; - if (ndev && is_bonded) - ndev = netdev_master_upper_dev_get(ndev); + if (ndev && is_bonded) { + rcu_read_lock(); /* required to get upper dev */ + ndev = netdev_master_upper_dev_get_rcu(ndev); + rcu_read_unlock(); + } if (!ndev) goto out_unlock; @@ -392,8 +660,6 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->phys_state = state_to_phys_state(props->state); out_unlock: spin_unlock_bh(&iboe->lock); - if (is_bonded) - rtnl_unlock(); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; @@ -476,23 +742,27 @@ out: return err; } -static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid) -{ - struct mlx4_ib_dev *dev = to_mdev(ibdev); - - *gid = dev->iboe.gid_table[port - 1][index]; - - return 0; -} - static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { - if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + int ret; + + if (rdma_protocol_ib(ibdev, port)) return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); - else - return iboe_query_gid(ibdev, port, index, gid); + + if (!rdma_protocol_roce(ibdev, port)) + return -ENODEV; + + if (!rdma_cap_roce_gid_table(ibdev, port)) + return -ENODEV; + + ret = ib_get_cached_gid(ibdev, port, index, gid, NULL); + if (ret == -EAGAIN) { + memcpy(gid, &zgid, sizeof(*gid)); + return 0; + } + + return ret; } int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, @@ -653,7 +923,7 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, resp.cqe_size = dev->dev->caps.cqe_size; } - context = kmalloc(sizeof *context, GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); @@ -690,21 +960,143 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) return 0; } +static void mlx4_ib_vma_open(struct vm_area_struct *area) +{ + /* vma_open is called when a new VMA is created on top of our VMA. + * This is done through either mremap flow or split_vma (usually due + * to mlock, madvise, munmap, etc.). We do not support a clone of the + * vma, as this VMA is strongly hardware related. Therefore we set the + * vm_ops of the newly created/cloned VMA to NULL, to prevent it from + * calling us again and trying to do incorrect actions. We assume that + * the original vma size is exactly a single page that there will be no + * "splitting" operations on. + */ + area->vm_ops = NULL; +} + +static void mlx4_ib_vma_close(struct vm_area_struct *area) +{ + struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data; + + /* It's guaranteed that all VMAs opened on a FD are closed before the + * file itself is closed, therefore no sync is needed with the regular + * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync + * with accessing the vma as part of mlx4_ib_disassociate_ucontext. + * The close operation is usually called under mm->mmap_sem except when + * process is exiting. The exiting case is handled explicitly as part + * of mlx4_ib_disassociate_ucontext. + */ + mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *) + area->vm_private_data; + + /* set the vma context pointer to null in the mlx4_ib driver's private + * data to protect against a race condition in mlx4_ib_dissassociate_ucontext(). + */ + mlx4_ib_vma_priv_data->vma = NULL; +} + +static const struct vm_operations_struct mlx4_ib_vm_ops = { + .open = mlx4_ib_vma_open, + .close = mlx4_ib_vma_close +}; + +static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + int i; + int ret = 0; + struct vm_area_struct *vma; + struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + /* make sure that task is dead before returning, it may + * prevent a rare case of module down in parallel to a + * call to mlx4_ib_vma_close. + */ + put_task_struct(owning_process); + msleep(1); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the task struct */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } + + /* need to protect from a race on closing the vma as part of + * mlx4_ib_vma_close(). + */ + down_read(&owning_mm->mmap_sem); + for (i = 0; i < HW_BAR_COUNT; i++) { + vma = context->hw_bar_info[i].vma; + if (!vma) + continue; + + ret = zap_vma_ptes(context->hw_bar_info[i].vma, + context->hw_bar_info[i].vma->vm_start, + PAGE_SIZE); + if (ret) { + pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret); + BUG_ON(1); + } + + /* context going to be destroyed, should not access ops any more */ + context->hw_bar_info[i].vma->vm_ops = NULL; + } + + up_read(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); +} + +static void mlx4_ib_set_vma_data(struct vm_area_struct *vma, + struct mlx4_ib_vma_private_data *vma_private_data) +{ + vma_private_data->vma = vma; + vma->vm_private_data = vma_private_data; + vma->vm_ops = &mlx4_ib_vm_ops; +} + static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct mlx4_ib_dev *dev = to_mdev(context->device); + struct mlx4_ib_ucontext *mucontext = to_mucontext(context); if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; if (vma->vm_pgoff == 0) { + /* We prevent double mmaping on same context */ + if (mucontext->hw_bar_info[HW_BAR_DB].vma) + return -EINVAL; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; + + mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]); + } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { + /* We prevent double mmaping on same context */ + if (mucontext->hw_bar_info[HW_BAR_BF].vma) + return -EINVAL; + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, @@ -712,8 +1104,36 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) dev->dev->caps.num_uars, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - } else + + mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]); + + } else if (vma->vm_pgoff == 3) { + struct mlx4_clock_params params; + int ret; + + /* We prevent double mmaping on same context */ + if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma) + return -EINVAL; + + ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); + + if (ret) + return ret; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, + (pci_resource_start(dev->dev->persist->pdev, + params.bar) + + params.offset) + >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + mlx4_ib_set_vma_data(vma, + &mucontext->hw_bar_info[HW_BAR_CLOCK]); + } else { return -EINVAL; + } return 0; } @@ -758,6 +1178,7 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; + struct ib_cq_init_attr cq_attr = {}; int err; if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) @@ -777,7 +1198,8 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, goto err2; } - xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); + cq_attr.cqe = 1; + xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; @@ -827,6 +1249,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) return 0; } +static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev, + struct mlx4_ib_counters *ctr_table) +{ + struct counter_index *counter, *tmp_count; + + mutex_lock(&ctr_table->mutex); + list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list, + list) { + if (counter->allocated) + mlx4_counter_free(ibdev->dev, counter->index); + list_del(&counter->list); + kfree(counter); + } + mutex_unlock(&ctr_table->mutex); +} + int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid) { @@ -1090,7 +1528,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_NATIVE); + MLX4_CMD_WRAPPED); if (ret == -ENOMEM) pr_err("mcg table is full. Fail to register network rule.\n"); else if (ret == -ENXIO) @@ -1107,7 +1545,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) int err; err = mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_NATIVE); + MLX4_CMD_WRAPPED); if (err) pr_err("Fail to detach network rule. registration id = 0x%llx\n", reg_id); @@ -1185,7 +1623,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, &mflow->reg_id[i].id); if (err) goto err_create_flow; - i++; if (is_bonded) { /* Application always sees one port so the mirror rule * must be on port #2 @@ -1200,6 +1637,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, j++; } + i++; } if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { @@ -1207,7 +1645,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, &mflow->reg_id[i].id); if (err) goto err_create_flow; - i++; + if (is_bonded) { flow_attr->port = 2; err = mlx4_ib_tunnel_steer_add(qp, flow_attr, @@ -1218,6 +1656,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, j++; } /* function to create mirror rule */ + i++; } return &mflow->ibflow; @@ -1489,272 +1928,6 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; -static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, - struct net_device *dev) -{ - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); - if (vlan_id < 0x1000) { - eui[3] = vlan_id >> 8; - eui[4] = vlan_id & 0xff; - } else { - eui[3] = 0xff; - eui[4] = 0xfe; - } - eui[0] ^= 2; -} - -static void update_gids_task(struct work_struct *work) -{ - struct update_gid_work *gw = container_of(work, struct update_gid_work, work); - struct mlx4_cmd_mailbox *mailbox; - union ib_gid *gids; - int err; - struct mlx4_dev *dev = gw->dev->dev; - int is_bonded = mlx4_is_bonded(dev); - - if (!gw->dev->ib_active) - return; - - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) { - pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); - return; - } - - gids = mailbox->buf; - memcpy(gids, gw->gids, sizeof gw->gids); - - err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, - MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); - if (err) - pr_warn("set port command failed\n"); - else - if ((gw->port == 1) || !is_bonded) - mlx4_ib_dispatch_event(gw->dev, - is_bonded ? 1 : gw->port, - IB_EVENT_GID_CHANGE); - - mlx4_free_cmd_mailbox(dev, mailbox); - kfree(gw); -} - -static void reset_gids_task(struct work_struct *work) -{ - struct update_gid_work *gw = - container_of(work, struct update_gid_work, work); - struct mlx4_cmd_mailbox *mailbox; - union ib_gid *gids; - int err; - struct mlx4_dev *dev = gw->dev->dev; - - if (!gw->dev->ib_active) - return; - - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) { - pr_warn("reset gid table failed\n"); - goto free; - } - - gids = mailbox->buf; - memcpy(gids, gw->gids, sizeof(gw->gids)); - - if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) == - IB_LINK_LAYER_ETHERNET) { - err = mlx4_cmd(dev, mailbox->dma, - MLX4_SET_PORT_GID_TABLE << 8 | gw->port, - MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_WRAPPED); - if (err) - pr_warn("set port %d command failed\n", gw->port); - } - - mlx4_free_cmd_mailbox(dev, mailbox); -free: - kfree(gw); -} - -static int update_gid_table(struct mlx4_ib_dev *dev, int port, - union ib_gid *gid, int clear, - int default_gid) -{ - struct update_gid_work *work; - int i; - int need_update = 0; - int free = -1; - int found = -1; - int max_gids; - - if (default_gid) { - free = 0; - } else { - max_gids = dev->dev->caps.gid_table_len[port]; - for (i = 1; i < max_gids; ++i) { - if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, - sizeof(*gid))) - found = i; - - if (clear) { - if (found >= 0) { - need_update = 1; - dev->iboe.gid_table[port - 1][found] = - zgid; - break; - } - } else { - if (found >= 0) - break; - - if (free < 0 && - !memcmp(&dev->iboe.gid_table[port - 1][i], - &zgid, sizeof(*gid))) - free = i; - } - } - } - - if (found == -1 && !clear && free >= 0) { - dev->iboe.gid_table[port - 1][free] = *gid; - need_update = 1; - } - - if (!need_update) - return 0; - - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - - memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids)); - INIT_WORK(&work->work, update_gids_task); - work->port = port; - work->dev = dev; - queue_work(wq, &work->work); - - return 0; -} - -static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid) -{ - gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); - mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev); -} - - -static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port) -{ - struct update_gid_work *work; - - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - - memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids)); - memset(work->gids, 0, sizeof(work->gids)); - INIT_WORK(&work->work, reset_gids_task); - work->dev = dev; - work->port = port; - queue_work(wq, &work->work); - return 0; -} - -static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, - struct mlx4_ib_dev *ibdev, union ib_gid *gid) -{ - struct mlx4_ib_iboe *iboe; - int port = 0; - struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? - rdma_vlan_dev_real_dev(event_netdev) : - event_netdev; - union ib_gid default_gid; - - mlx4_make_default_gid(real_dev, &default_gid); - - if (!memcmp(gid, &default_gid, sizeof(*gid))) - return 0; - - if (event != NETDEV_DOWN && event != NETDEV_UP) - return 0; - - if ((real_dev != event_netdev) && - (event == NETDEV_DOWN) && - rdma_link_local_addr((struct in6_addr *)gid)) - return 0; - - iboe = &ibdev->iboe; - spin_lock_bh(&iboe->lock); - - for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) - if ((netif_is_bond_master(real_dev) && - (real_dev == iboe->masters[port - 1])) || - (!netif_is_bond_master(real_dev) && - (real_dev == iboe->netdevs[port - 1]))) - update_gid_table(ibdev, port, gid, - event == NETDEV_DOWN, 0); - - spin_unlock_bh(&iboe->lock); - return 0; - -} - -static u8 mlx4_ib_get_dev_port(struct net_device *dev, - struct mlx4_ib_dev *ibdev) -{ - u8 port = 0; - struct mlx4_ib_iboe *iboe; - struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ? - rdma_vlan_dev_real_dev(dev) : dev; - - iboe = &ibdev->iboe; - - for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) - if ((netif_is_bond_master(real_dev) && - (real_dev == iboe->masters[port - 1])) || - (!netif_is_bond_master(real_dev) && - (real_dev == iboe->netdevs[port - 1]))) - break; - - if ((port == 0) || (port > ibdev->dev->caps.num_ports)) - return 0; - else - return port; -} - -static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct mlx4_ib_dev *ibdev; - struct in_ifaddr *ifa = ptr; - union ib_gid gid; - struct net_device *event_netdev = ifa->ifa_dev->dev; - - ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); - - ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet); - - mlx4_ib_addr_event(event, event_netdev, ibdev, &gid); - return NOTIFY_DONE; -} - -#if IS_ENABLED(CONFIG_IPV6) -static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct mlx4_ib_dev *ibdev; - struct inet6_ifaddr *ifa = ptr; - union ib_gid *gid = (union ib_gid *)&ifa->addr; - struct net_device *event_netdev = ifa->idev->dev; - - ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6); - - mlx4_ib_addr_event(event, event_netdev, ibdev, gid); - return NOTIFY_DONE; -} -#endif - #define MLX4_IB_INVALID_MAC ((u64)-1) static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, struct net_device *dev, @@ -1813,94 +1986,6 @@ unlock: mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]); } -static void mlx4_ib_get_dev_addr(struct net_device *dev, - struct mlx4_ib_dev *ibdev, u8 port) -{ - struct in_device *in_dev; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_dev *in6_dev; - union ib_gid *pgid; - struct inet6_ifaddr *ifp; - union ib_gid default_gid; -#endif - union ib_gid gid; - - - if ((port == 0) || (port > ibdev->dev->caps.num_ports)) - return; - - /* IPv4 gids */ - in_dev = in_dev_get(dev); - if (in_dev) { - for_ifa(in_dev) { - /*ifa->ifa_address;*/ - ipv6_addr_set_v4mapped(ifa->ifa_address, - (struct in6_addr *)&gid); - update_gid_table(ibdev, port, &gid, 0, 0); - } - endfor_ifa(in_dev); - in_dev_put(in_dev); - } -#if IS_ENABLED(CONFIG_IPV6) - mlx4_make_default_gid(dev, &default_gid); - /* IPv6 gids */ - in6_dev = in6_dev_get(dev); - if (in6_dev) { - read_lock_bh(&in6_dev->lock); - list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { - pgid = (union ib_gid *)&ifp->addr; - if (!memcmp(pgid, &default_gid, sizeof(*pgid))) - continue; - update_gid_table(ibdev, port, pgid, 0, 0); - } - read_unlock_bh(&in6_dev->lock); - in6_dev_put(in6_dev); - } -#endif -} - -static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev, - struct net_device *dev, u8 port) -{ - union ib_gid gid; - mlx4_make_default_gid(dev, &gid); - update_gid_table(ibdev, port, &gid, 0, 1); -} - -static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) -{ - struct net_device *dev; - struct mlx4_ib_iboe *iboe = &ibdev->iboe; - int i; - int err = 0; - - for (i = 1; i <= ibdev->num_ports; ++i) { - if (rdma_port_get_link_layer(&ibdev->ib_dev, i) == - IB_LINK_LAYER_ETHERNET) { - err = reset_gid_table(ibdev, i); - if (err) - goto out; - } - } - - read_lock(&dev_base_lock); - spin_lock_bh(&iboe->lock); - - for_each_netdev(&init_net, dev) { - u8 port = mlx4_ib_get_dev_port(dev, ibdev); - /* port will be non-zero only for ETH ports */ - if (port) { - mlx4_ib_set_default_gid(ibdev, dev, port); - mlx4_ib_get_dev_addr(dev, ibdev, port); - } - } - - spin_unlock_bh(&iboe->lock); - read_unlock(&dev_base_lock); -out: - return err; -} - static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, struct net_device *dev, unsigned long event) @@ -1910,81 +1995,22 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, int update_qps_port = -1; int port; + ASSERT_RTNL(); + iboe = &ibdev->iboe; spin_lock_bh(&iboe->lock); mlx4_foreach_ib_transport_port(port, ibdev->dev) { - enum ib_port_state port_state = IB_PORT_NOP; - struct net_device *old_master = iboe->masters[port - 1]; - struct net_device *curr_netdev; - struct net_device *curr_master; iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); - if (iboe->netdevs[port - 1]) - mlx4_ib_set_default_gid(ibdev, - iboe->netdevs[port - 1], port); - curr_netdev = iboe->netdevs[port - 1]; - - if (iboe->netdevs[port - 1] && - netif_is_bond_slave(iboe->netdevs[port - 1])) { - iboe->masters[port - 1] = netdev_master_upper_dev_get( - iboe->netdevs[port - 1]); - } else { - iboe->masters[port - 1] = NULL; - } - curr_master = iboe->masters[port - 1]; if (dev == iboe->netdevs[port - 1] && (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER || event == NETDEV_UP || event == NETDEV_CHANGE)) update_qps_port = port; - if (curr_netdev) { - port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? - IB_PORT_ACTIVE : IB_PORT_DOWN; - mlx4_ib_set_default_gid(ibdev, curr_netdev, port); - if (curr_master) { - /* if using bonding/team and a slave port is down, we - * don't want the bond IP based gids in the table since - * flows that select port by gid may get the down port. - */ - if (port_state == IB_PORT_DOWN && - !mlx4_is_bonded(ibdev->dev)) { - reset_gid_table(ibdev, port); - mlx4_ib_set_default_gid(ibdev, - curr_netdev, - port); - } else { - /* gids from the upper dev (bond/team) - * should appear in port's gid table - */ - mlx4_ib_get_dev_addr(curr_master, - ibdev, port); - } - } - /* if bonding is used it is possible that we add it to - * masters only after IP address is assigned to the - * net bonding interface. - */ - if (curr_master && (old_master != curr_master)) { - reset_gid_table(ibdev, port); - mlx4_ib_set_default_gid(ibdev, - curr_netdev, port); - mlx4_ib_get_dev_addr(curr_master, ibdev, port); - } - - if (!curr_master && (old_master != curr_master)) { - reset_gid_table(ibdev, port); - mlx4_ib_set_default_gid(ibdev, - curr_netdev, port); - mlx4_ib_get_dev_addr(curr_netdev, ibdev, port); - } - } else { - reset_gid_table(ibdev, port); - } } - spin_unlock_bh(&iboe->lock); if (update_qps_port > 0) @@ -2041,77 +2067,75 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { - char name[80]; - int eq_per_port = 0; - int added_eqs = 0; - int total_eqs = 0; - int i, j, eq; + int i, j, eq = 0, total_eqs = 0; - /* Legacy mode or comp_pool is not large enough */ - if (dev->caps.comp_pool == 0 || - dev->caps.num_ports > dev->caps.comp_pool) - return; - - eq_per_port = dev->caps.comp_pool / dev->caps.num_ports; - - /* Init eq table */ - added_eqs = 0; - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) - added_eqs += eq_per_port; - - total_eqs = dev->caps.num_comp_vectors + added_eqs; - - ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL); + ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors, + sizeof(ibdev->eq_table[0]), GFP_KERNEL); if (!ibdev->eq_table) return; - ibdev->eq_added = added_eqs; - - eq = 0; - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { - for (j = 0; j < eq_per_port; j++) { - snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", - i, j, dev->persist->pdev->bus->name); - /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq])) { - /* Use legacy (same as mlx4_en driver) */ - pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); - ibdev->eq_table[eq] = - (eq % dev->caps.num_comp_vectors); - } - eq++; + for (i = 1; i <= dev->caps.num_ports; i++) { + for (j = 0; j < mlx4_get_eqs_per_port(dev, i); + j++, total_eqs++) { + if (i > 1 && mlx4_is_eq_shared(dev, total_eqs)) + continue; + ibdev->eq_table[eq] = total_eqs; + if (!mlx4_assign_eq(dev, i, + &ibdev->eq_table[eq])) + eq++; + else + ibdev->eq_table[eq] = -1; } } - /* Fill the reset of the vector with legacy EQ */ - for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++) - ibdev->eq_table[eq++] = i; + for (i = eq; i < dev->caps.num_comp_vectors; + ibdev->eq_table[i++] = -1) + ; /* Advertise the new number of EQs to clients */ - ibdev->ib_dev.num_comp_vectors = total_eqs; + ibdev->ib_dev.num_comp_vectors = eq; } static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { int i; + int total_eqs = ibdev->ib_dev.num_comp_vectors; - /* no additional eqs were added */ + /* no eqs were allocated */ if (!ibdev->eq_table) return; /* Reset the advertised EQ number */ - ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; + ibdev->ib_dev.num_comp_vectors = 0; - /* Free only the added eqs */ - for (i = 0; i < ibdev->eq_added; i++) { - /* Don't free legacy eqs if used */ - if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors) - continue; + for (i = 0; i < total_eqs; i++) mlx4_release_eq(dev, ibdev->eq_table[i]); - } kfree(ibdev->eq_table); + ibdev->eq_table = NULL; +} + +static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mlx4_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + else + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; } static void *mlx4_ib_add(struct mlx4_dev *dev) @@ -2123,6 +2147,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) struct mlx4_ib_iboe *iboe; int ib_num_ports = 0; int num_req_counters; + int allocated; + u32 counter_index; + struct counter_index *new_counter_index = NULL; pr_info_once("%s", mlx4_ib_version); @@ -2167,6 +2194,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) 1 : ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->persist->pdev->dev; + ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev; + ibdev->ib_dev.add_gid = mlx4_ib_add_gid; + ibdev->ib_dev.del_gid = mlx4_ib_del_gid; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2235,12 +2265,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; - ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; - ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; - ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; + ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; + ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; + ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; + ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; @@ -2278,6 +2309,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); } + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); + mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); @@ -2285,22 +2321,58 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (init_node_data(ibdev)) goto err_map; + for (i = 0; i < ibdev->num_ports; ++i) { + mutex_init(&ibdev->counters_table[i].mutex); + INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list); + } + num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; for (i = 0; i < num_req_counters; ++i) { mutex_init(&ibdev->qp1_proxy_lock[i]); + allocated = 0; if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { - err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); + err = mlx4_counter_alloc(ibdev->dev, &counter_index); + /* if failed to allocate a new counter, use default */ if (err) - ibdev->counters[i] = -1; - } else { - ibdev->counters[i] = -1; + counter_index = + mlx4_get_default_counter_index(dev, + i + 1); + else + allocated = 1; + } else { /* IB_LINK_LAYER_INFINIBAND use the default counter */ + counter_index = mlx4_get_default_counter_index(dev, + i + 1); } + new_counter_index = kmalloc(sizeof(*new_counter_index), + GFP_KERNEL); + if (!new_counter_index) { + if (allocated) + mlx4_counter_free(ibdev->dev, counter_index); + goto err_counter; + } + new_counter_index->index = counter_index; + new_counter_index->allocated = allocated; + list_add_tail(&new_counter_index->list, + &ibdev->counters_table[i].counters_list); + ibdev->counters_table[i].default_counter = counter_index; + pr_info("counter index %d for port %d allocated %d\n", + counter_index, i + 1, allocated); } if (mlx4_is_bonded(dev)) - for (i = 1; i < ibdev->num_ports ; ++i) - ibdev->counters[i] = ibdev->counters[0]; - + for (i = 1; i < ibdev->num_ports ; ++i) { + new_counter_index = + kmalloc(sizeof(struct counter_index), + GFP_KERNEL); + if (!new_counter_index) + goto err_counter; + new_counter_index->index = counter_index; + new_counter_index->allocated = 0; + list_add_tail(&new_counter_index->list, + &ibdev->counters_table[i].counters_list); + ibdev->counters_table[i].default_counter = + counter_index; + } mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) ib_num_ports++; @@ -2360,26 +2432,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_notif; } } - if (!iboe->nb_inet.notifier_call) { - iboe->nb_inet.notifier_call = mlx4_ib_inet_event; - err = register_inetaddr_notifier(&iboe->nb_inet); - if (err) { - iboe->nb_inet.notifier_call = NULL; - goto err_notif; - } - } -#if IS_ENABLED(CONFIG_IPV6) - if (!iboe->nb_inet6.notifier_call) { - iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event; - err = register_inet6addr_notifier(&iboe->nb_inet6); - if (err) { - iboe->nb_inet6.notifier_call = NULL; - goto err_notif; - } - } -#endif - if (mlx4_ib_init_gid_table(ibdev)) - goto err_notif; } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { @@ -2410,18 +2462,6 @@ err_notif: pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } - if (ibdev->iboe.nb_inet.notifier_call) { - if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) - pr_warn("failure unregistering notifier\n"); - ibdev->iboe.nb_inet.notifier_call = NULL; - } -#if IS_ENABLED(CONFIG_IPV6) - if (ibdev->iboe.nb_inet6.notifier_call) { - if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) - pr_warn("failure unregistering notifier\n"); - ibdev->iboe.nb_inet6.notifier_call = NULL; - } -#endif flush_workqueue(wq); mlx4_ib_close_sriov(ibdev); @@ -2440,9 +2480,8 @@ err_steer_qp_release: mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); err_counter: - for (; i; --i) - if (ibdev->counters[i - 1] != -1) - mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]); + for (i = 0; i < ibdev->num_ports; ++i) + mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]); err_map: iounmap(ibdev->uar_map); @@ -2545,23 +2584,10 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) kfree(ibdev->ib_uc_qpns_bitmap); } - if (ibdev->iboe.nb_inet.notifier_call) { - if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) - pr_warn("failure unregistering notifier\n"); - ibdev->iboe.nb_inet.notifier_call = NULL; - } -#if IS_ENABLED(CONFIG_IPV6) - if (ibdev->iboe.nb_inet6.notifier_call) { - if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) - pr_warn("failure unregistering notifier\n"); - ibdev->iboe.nb_inet6.notifier_call = NULL; - } -#endif - iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) - if (ibdev->counters[p] != -1) - mlx4_counter_free(ibdev->dev, ibdev->counters[p]); + mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]); + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); @@ -2592,31 +2618,33 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); - goto out; + return; } for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); - for (i = 0; i < dev->caps.num_ports; i++) { - if (dm[i]) - kfree(dm[i]); - } + while (--i >= 0) + kfree(dm[i]); goto out; } - } - /* initialize or tear down tunnel QPs for the slave */ - for (i = 0; i < ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); dm[i]->port = first_port + i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; - spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); - if (!ibdev->sriov.is_going_down) + } + /* initialize or tear down tunnel QPs for the slave */ + spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); + if (!ibdev->sriov.is_going_down) { + for (i = 0; i < ports; i++) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + } else { + spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + for (i = 0; i < ports; i++) + kfree(dm[i]); } out: kfree(dm); diff --git a/kernel/drivers/infiniband/hw/mlx4/mcg.c b/kernel/drivers/infiniband/hw/mlx4/mcg.c index a0559a8af..99451d887 100644 --- a/kernel/drivers/infiniband/hw/mlx4/mcg.c +++ b/kernel/drivers/infiniband/hw/mlx4/mcg.c @@ -51,6 +51,10 @@ pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\ (group)->name, group->demux->port, ## arg) +#define mcg_debug_group(group, format, arg...) \ + pr_debug("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\ + (group)->name, (group)->demux->port, ## arg) + #define mcg_error_group(group, format, arg...) \ pr_err(" %16s: " format, (group)->name, ## arg) @@ -218,7 +222,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) spin_unlock_irqrestore(&dev->sm_lock, flags); return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, - &ah_attr, NULL, mad); + &ah_attr, NULL, 0xffff, mad); } static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, @@ -962,8 +966,8 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, mutex_lock(&group->lock); if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) { mutex_unlock(&group->lock); - mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n", - port, slave, MAX_PEND_REQS_PER_FUNC); + mcg_debug_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n", + port, slave, MAX_PEND_REQS_PER_FUNC); release_group(group, 0); kfree(req); return -ENOMEM; diff --git a/kernel/drivers/infiniband/hw/mlx4/mlx4_ib.h b/kernel/drivers/infiniband/hw/mlx4/mlx4_ib.h index fce393437..1caa11eda 100644 --- a/kernel/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/kernel/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -70,11 +70,24 @@ extern int mlx4_ib_sm_guid_assign; #define MLX4_IB_UC_STEER_QPN_ALIGN 1 #define MLX4_IB_UC_MAX_NUM_QPS 256 + +enum hw_bar_type { + HW_BAR_BF, + HW_BAR_DB, + HW_BAR_CLOCK, + HW_BAR_COUNT +}; + +struct mlx4_ib_vma_private_data { + struct vm_area_struct *vma; +}; + struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; struct list_head db_page_list; struct mutex db_page_mutex; + struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; }; struct mlx4_ib_pd { @@ -110,15 +123,23 @@ struct mlx4_ib_cq { struct mutex resize_mutex; struct ib_umem *umem; struct ib_umem *resize_umem; + int create_flags; /* List of qps that it serves.*/ struct list_head send_qp_list; struct list_head recv_qp_list; }; +#define MLX4_MR_PAGES_ALIGN 0x40 + struct mlx4_ib_mr { struct ib_mr ibmr; + __be64 *pages; + dma_addr_t page_map; + u32 npages; + u32 max_pages; struct mlx4_mr mmr; struct ib_umem *umem; + void *pages_alloc; }; struct mlx4_ib_mw { @@ -126,12 +147,6 @@ struct mlx4_ib_mw { struct mlx4_mw mmw; }; -struct mlx4_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - __be64 *mapped_page_list; - dma_addr_t map; -}; - struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; @@ -306,6 +321,7 @@ struct mlx4_ib_qp { struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; + struct counter_index *counter_index; }; struct mlx4_ib_srq { @@ -414,7 +430,6 @@ struct mlx4_ib_demux_pv_ctx { struct ib_device *ib_dev; struct ib_cq *cq; struct ib_pd *pd; - struct ib_mr *mr; struct work_struct work; struct workqueue_struct *wq; struct mlx4_ib_demux_pv_qp qp[2]; @@ -456,15 +471,26 @@ struct mlx4_ib_sriov { struct idr pv_id_table; }; +struct gid_cache_context { + int real_index; + int refcount; +}; + +struct gid_entry { + union ib_gid gid; + struct gid_cache_context *ctx; +}; + +struct mlx4_port_gid_table { + struct gid_entry gids[MLX4_MAX_PORT_GIDS]; +}; + struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; - struct net_device *masters[MLX4_MAX_PORTS]; atomic64_t mac[MLX4_MAX_PORTS]; struct notifier_block nb; - struct notifier_block nb_inet; - struct notifier_block nb_inet6; - union ib_gid gid_table[MLX4_MAX_PORTS][128]; + struct mlx4_port_gid_table gids[MLX4_MAX_PORTS]; }; struct pkey_mgt { @@ -503,6 +529,18 @@ struct mlx4_ib_iov_port { struct mlx4_ib_iov_sysfs_attr mcg_dentry; }; +struct counter_index { + struct list_head list; + u32 index; + u8 allocated; +}; + +struct mlx4_ib_counters { + struct list_head counters_list; + struct mutex mutex; /* mutex for accessing counters list */ + u32 default_counter; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -521,9 +559,8 @@ struct mlx4_ib_dev { struct mutex cap_mask_mutex; bool ib_active; struct mlx4_ib_iboe iboe; - int counters[MLX4_MAX_PORTS]; + struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS]; int *eq_table; - int eq_added; struct kobject *iov_parent; struct kobject *ports_parent; struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; @@ -555,6 +592,21 @@ struct mlx4_ib_qp_tunnel_init_attr { u8 port; }; +struct mlx4_uverbs_ex_query_device { + __u32 comp_mask; + __u32 reserved; +}; + +enum query_device_resp_mask { + QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0, +}; + +struct mlx4_uverbs_ex_query_device_resp { + __u32 comp_mask; + __u32 response_length; + __u64 hca_core_clock_offset; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -595,11 +647,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) return container_of(ibmw, struct mlx4_ib_mw, ibmw); } -static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) -{ - return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); -} - static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); @@ -660,15 +707,16 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); int mlx4_ib_dealloc_mw(struct ib_mw *mw); -struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, - int max_page_list_len); -struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len); -void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); - +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int mlx4_ib_destroy_cq(struct ib_cq *cq); @@ -706,11 +754,13 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); @@ -766,7 +816,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, - struct ib_mad *mad); + u16 vlan_id, struct ib_mad *mad); __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); @@ -815,5 +865,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); +int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, + u8 port_num, int index); #endif /* MLX4_IB_H */ diff --git a/kernel/drivers/infiniband/hw/mlx4/mr.c b/kernel/drivers/infiniband/hw/mlx4/mr.c index e0d271782..4d1e1c632 100644 --- a/kernel/drivers/infiniband/hw/mlx4/mr.c +++ b/kernel/drivers/infiniband/hw/mlx4/mr.c @@ -59,7 +59,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) struct mlx4_ib_mr *mr; int err; - mr = kmalloc(sizeof *mr, GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -140,7 +140,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int err; int n; - mr = kmalloc(sizeof *mr, GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -271,11 +271,59 @@ release_mpt_entry: return err; } +static int +mlx4_alloc_priv_pages(struct ib_device *device, + struct mlx4_ib_mr *mr, + int max_pages) +{ + int size = max_pages * sizeof(u64); + int add_size; + int ret; + + add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + + mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL); + if (!mr->pages_alloc) + return -ENOMEM; + + mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN); + + mr->page_map = dma_map_single(device->dma_device, mr->pages, + size, DMA_TO_DEVICE); + + if (dma_mapping_error(device->dma_device, mr->page_map)) { + ret = -ENOMEM; + goto err; + } + + return 0; +err: + kfree(mr->pages_alloc); + + return ret; +} + +static void +mlx4_free_priv_pages(struct mlx4_ib_mr *mr) +{ + if (mr->pages) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_pages * sizeof(u64); + + dma_unmap_single(device->dma_device, mr->page_map, + size, DMA_TO_DEVICE); + kfree(mr->pages_alloc); + mr->pages = NULL; + } +} + int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); int ret; + mlx4_free_priv_pages(mr); + ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); if (ret) return ret; @@ -321,21 +369,21 @@ err_free: int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) { - struct ib_send_wr wr; + struct ib_bind_mw_wr wr; struct ib_send_wr *bad_wr; int ret; memset(&wr, 0, sizeof(wr)); - wr.opcode = IB_WR_BIND_MW; - wr.wr_id = mw_bind->wr_id; - wr.send_flags = mw_bind->send_flags; - wr.wr.bind_mw.mw = mw; - wr.wr.bind_mw.bind_info = mw_bind->bind_info; - wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); - - ret = mlx4_ib_post_send(qp, &wr, &bad_wr); + wr.wr.opcode = IB_WR_BIND_MW; + wr.wr.wr_id = mw_bind->wr_id; + wr.wr.send_flags = mw_bind->send_flags; + wr.mw = mw; + wr.bind_info = mw_bind->bind_info; + wr.rkey = ib_inc_rkey(mw->rkey); + + ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr); if (!ret) - mw->rkey = wr.wr.bind_mw.rkey; + mw->rkey = wr.rkey; return ret; } @@ -350,87 +398,51 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) return 0; } -struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, - int max_page_list_len) +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; int err; - mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (mr_type != IB_MR_TYPE_MEM_REG || + max_num_sg > MLX4_MAX_FAST_REG_PAGES) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, - max_page_list_len, 0, &mr->mmr); + max_num_sg, 0, &mr->mmr); if (err) goto err_free; + err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg); + if (err) + goto err_free_mr; + + mr->max_pages = max_num_sg; + err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) - goto err_mr; + goto err_free_pl; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->umem = NULL; return &mr->ibmr; -err_mr: +err_free_pl: + mlx4_free_priv_pages(mr); +err_free_mr: (void) mlx4_mr_free(dev->dev, &mr->mmr); - err_free: kfree(mr); return ERR_PTR(err); } -struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len) -{ - struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct mlx4_ib_fast_reg_page_list *mfrpl; - int size = page_list_len * sizeof (u64); - - if (page_list_len > MLX4_MAX_FAST_REG_PAGES) - return ERR_PTR(-EINVAL); - - mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); - if (!mfrpl) - return ERR_PTR(-ENOMEM); - - mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); - if (!mfrpl->ibfrpl.page_list) - goto err_free; - - mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist-> - pdev->dev, - size, &mfrpl->map, - GFP_KERNEL); - if (!mfrpl->mapped_page_list) - goto err_free; - - WARN_ON(mfrpl->map & 0x3f); - - return &mfrpl->ibfrpl; - -err_free: - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); - return ERR_PTR(-ENOMEM); -} - -void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - struct mlx4_ib_dev *dev = to_mdev(page_list->device); - struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); - int size = page_list->max_page_list_len * sizeof (u64); - - dma_free_coherent(&dev->dev->persist->pdev->dev, size, - mfrpl->mapped_page_list, - mfrpl->map); - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); -} - struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { @@ -523,3 +535,37 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) return err; } + +static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct mlx4_ib_mr *mr = to_mmr(ibmr); + + if (unlikely(mr->npages == mr->max_pages)) + return -ENOMEM; + + mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT); + + return 0; +} + +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct mlx4_ib_mr *mr = to_mmr(ibmr); + int rc; + + mr->npages = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, + sizeof(u64) * mr->max_pages, + DMA_TO_DEVICE); + + rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page); + + ib_dma_sync_single_for_device(ibmr->device, mr->page_map, + sizeof(u64) * mr->max_pages, + DMA_TO_DEVICE); + + return rc; +} diff --git a/kernel/drivers/infiniband/hw/mlx4/qp.c b/kernel/drivers/infiniband/hw/mlx4/qp.c index 02fc91c68..13eaaf452 100644 --- a/kernel/drivers/infiniband/hw/mlx4/qp.c +++ b/kernel/drivers/infiniband/hw/mlx4/qp.c @@ -34,6 +34,7 @@ #include <linux/log2.h> #include <linux/slab.h> #include <linux/netdevice.h> +#include <linux/vmalloc.h> #include <rdma/ib_cache.h> #include <rdma/ib_pack.h> @@ -111,7 +112,7 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), - [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW), @@ -617,6 +618,18 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) return 0; } +static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, + struct mlx4_ib_qp *qp) +{ + mutex_lock(&dev->counters_table[qp->port - 1].mutex); + mlx4_counter_free(dev->dev, qp->counter_index->index); + list_del(&qp->counter_index->list); + mutex_unlock(&dev->counters_table[qp->port - 1].mutex); + + kfree(qp->counter_index); + qp->counter_index = NULL; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, @@ -746,9 +759,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) - qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; @@ -786,8 +796,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp); + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp); + if (!qp->sq.wrid) + qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), + gfp, PAGE_KERNEL); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp); + if (!qp->rq.wrid) + qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), + gfp, PAGE_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -822,6 +838,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_proxy; } + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) goto err_qpn; @@ -874,8 +893,8 @@ err_wrid: if (qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { - kfree(qp->sq.wrid); - kfree(qp->rq.wrid); + kvfree(qp->sq.wrid); + kvfree(qp->rq.wrid); } err_mtt: @@ -1050,8 +1069,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, &qp->db); ib_umem_release(qp->umem); } else { - kfree(qp->sq.wrid); - kfree(qp->rq.wrid); + kvfree(qp->sq.wrid); + kvfree(qp->rq.wrid); if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) free_proxy_bufs(&dev->ib_dev, qp); @@ -1086,6 +1105,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, { struct mlx4_ib_qp *qp = NULL; int err; + int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; gfp_t gfp; @@ -1109,8 +1129,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, } if (init_attr->create_flags && - (udata || - ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && + ((udata && init_attr->create_flags & ~(sup_u_create_flags)) || + ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | + MLX4_IB_QP_CREATE_USE_GFP_NOIO | + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) && init_attr->qp_type != IB_QPT_UD) || ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && init_attr->qp_type > IB_QPT_GSI))) @@ -1189,6 +1211,9 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); } + if (mqp->counter_index) + mlx4_ib_free_qp_counter(dev, mqp); + pd = get_pd(mqp); destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); @@ -1292,14 +1317,18 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, path->static_rate = 0; if (ah->ah_flags & IB_AH_GRH) { - if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { + int real_sgid_index = mlx4_ib_gid_index_to_real_index(dev, + port, + ah->grh.sgid_index); + + if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) { pr_err("sgid_index (%u) too large. max is %d\n", - ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); + real_sgid_index, dev->dev->caps.gid_table_len[port] - 1); return -1; } path->grh_mylmc |= 1 << 7; - path->mgid_index = ah->grh.sgid_index; + path->mgid_index = real_sgid_index; path->hop_limit = ah->grh.hop_limit; path->tclass_flowlabel = cpu_to_be32((ah->grh.traffic_class << 20) | @@ -1387,11 +1416,12 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, struct mlx4_ib_qp *mqp, - struct mlx4_qp_path *path, u8 port) + struct mlx4_qp_path *path, u8 port, + u16 vlan_id, u8 *smac) { return _mlx4_set_path(dev, &qp->ah_attr, - mlx4_mac_to_u64((u8 *)qp->smac), - (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, + mlx4_mac_to_u64(smac), + vlan_id, path, &mqp->pri, port); } @@ -1402,9 +1432,8 @@ static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, struct mlx4_qp_path *path, u8 port) { return _mlx4_set_path(dev, &qp->alt_ah_attr, - mlx4_mac_to_u64((u8 *)qp->alt_smac), - (qp_attr_mask & IB_QP_ALT_VID) ? - qp->alt_vlan_id : 0xffff, + 0, + 0xffff, path, &mqp->alt, port); } @@ -1420,7 +1449,8 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } } -static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac, +static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, + struct mlx4_ib_qp *qp, struct mlx4_qp_context *context) { u64 u64_mac; @@ -1443,6 +1473,40 @@ static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp * return 0; } +static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + struct counter_index *new_counter_index; + int err; + u32 tmp_idx; + + if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) != + IB_LINK_LAYER_ETHERNET || + !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) || + !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK)) + return 0; + + err = mlx4_counter_alloc(dev->dev, &tmp_idx); + if (err) + return err; + + new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL); + if (!new_counter_index) { + mlx4_counter_free(dev->dev, tmp_idx); + return -ENOMEM; + } + + new_counter_index->index = tmp_idx; + new_counter_index->allocated = 1; + qp->counter_index = new_counter_index; + + mutex_lock(&dev->counters_table[qp->port - 1].mutex); + list_add_tail(&new_counter_index->list, + &dev->counters_table[qp->port - 1].counters_list); + mutex_unlock(&dev->counters_table[qp->port - 1].mutex); + + return 0; +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -1456,6 +1520,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int sqd_event; int steer_qp = 0; int err = -EINVAL; + int counter_index; /* APM is not supported under RoCE */ if (attr_mask & IB_QP_ALT_PATH && @@ -1515,6 +1580,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; + if (new_state == IB_QPS_RESET && qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); @@ -1539,12 +1607,27 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { - if (dev->counters[qp->port - 1] != -1) { - context->pri_path.counter_index = - dev->counters[qp->port - 1]; + err = create_qp_lb_counter(dev, qp); + if (err) + goto out; + + counter_index = + dev->counters_table[qp->port - 1].default_counter; + if (qp->counter_index) + counter_index = qp->counter_index->index; + + if (counter_index != -1) { + context->pri_path.counter_index = counter_index; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; + if (qp->counter_index) { + context->pri_path.fl |= + MLX4_FL_ETH_SRC_CHECK_MC_LB; + context->pri_path.vlan_control |= + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; + } } else - context->pri_path.counter_index = 0xff; + context->pri_path.counter_index = + MLX4_SINK_COUNTER_INDEX(dev->dev); if (qp->flags & MLX4_IB_QP_NETIF) { mlx4_ib_steer_qp_reg(dev, qp, 1); @@ -1560,9 +1643,33 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { + u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : + attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + union ib_gid gid; + struct ib_gid_attr gid_attr; + u16 vlan = 0xffff; + u8 smac[ETH_ALEN]; + int status = 0; + + if (rdma_cap_eth_ah(&dev->ib_dev, port_num) && + attr->ah_attr.ah_flags & IB_AH_GRH) { + int index = attr->ah_attr.grh.sgid_index; + + status = ib_get_cached_gid(ibqp->device, port_num, + index, &gid, &gid_attr); + if (!status && !memcmp(&gid, &zgid, sizeof(gid))) + status = -ENOENT; + if (!status && gid_attr.ndev) { + vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); + memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(gid_attr.ndev); + } + } + if (status) + goto out; + if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, - attr_mask & IB_QP_PORT ? - attr->port_num : qp->port)) + port_num, vlan, smac)) goto out; optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | @@ -1699,7 +1806,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { - err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); + err = handle_eth_ud_smac_index(dev, qp, context); if (err) { err = -EINVAL; goto out; @@ -1843,6 +1950,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } out: + if (err && qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); if (err && steer_qp) mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); @@ -2031,14 +2140,14 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) } static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, - struct ib_send_wr *wr, + struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); struct ib_device *ib_dev = &mdev->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); u16 pkey; u32 qkey; int send_size; @@ -2046,13 +2155,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, int spc; int i; - if (wr->opcode != IB_WR_SEND) + if (wr->wr.opcode != IB_WR_SEND) return -EINVAL; send_size = 0; - for (i = 0; i < wr->num_sge; ++i) - send_size += wr->sg_list[i].length; + for (i = 0; i < wr->wr.num_sge; ++i) + send_size += wr->wr.sg_list[i].length; /* for proxy-qp0 sends, need to add in size of tunnel header */ /* for tunnel-qp0 sends, tunnel header is already in s/g list */ @@ -2077,11 +2186,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, mlx->rlid = sqp->ud_header.lrh.destination_lid; sqp->ud_header.lrh.virtual_lane = 0; - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); else sqp->ud_header.bth.destination_qpn = cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); @@ -2153,14 +2262,14 @@ static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac) } } -static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, +static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); union ib_gid sgid; u16 pkey; int send_size; @@ -2174,8 +2283,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, bool is_grh; send_size = 0; - for (i = 0; i < wr->num_sge; ++i) - send_size += wr->sg_list[i].length; + for (i = 0; i < wr->wr.num_sge; ++i) + send_size += wr->wr.sg_list[i].length; is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); @@ -2192,7 +2301,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } else { err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, &sgid); + ah->av.ib.gid_index, &sgid, + NULL); + if (!err && !memcmp(&sgid, &zgid, sizeof(sgid))) + err = -ENOENT; if (err) return err; } @@ -2234,7 +2346,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid); + &sqp->ud_header.grh.source_gid, NULL); } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); @@ -2252,7 +2364,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, mlx->rlid = sqp->ud_header.lrh.destination_lid; } - switch (wr->opcode) { + switch (wr->wr.opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -2260,7 +2372,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->ex.imm_data; + sqp->ud_header.immediate_data = wr->wr.ex.imm_data; break; default: return -EINVAL; @@ -2303,16 +2415,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else - ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); + ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? - sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? + sqp->qkey : wr->remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); @@ -2400,43 +2512,39 @@ static __be32 convert_access(int acc) cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } -static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, + struct ib_reg_wr *wr) { - struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); - int i; - - for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) - mfrpl->mapped_page_list[i] = - cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] | - MLX4_MTT_FLAG_PRESENT); + struct mlx4_ib_mr *mr = to_mmr(wr->mr); - fseg->flags = convert_access(wr->wr.fast_reg.access_flags); - fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); - fseg->buf_list = cpu_to_be64(mfrpl->map); - fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); - fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->flags = convert_access(wr->access); + fseg->mem_key = cpu_to_be32(wr->key); + fseg->buf_list = cpu_to_be64(mr->page_map); + fseg->start_addr = cpu_to_be64(mr->ibmr.iova); + fseg->reg_len = cpu_to_be64(mr->ibmr.length); fseg->offset = 0; /* XXX -- is this just for ZBVA? */ - fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->page_size = cpu_to_be32(ilog2(mr->ibmr.page_size)); fseg->reserved[0] = 0; fseg->reserved[1] = 0; } -static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) +static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, + struct ib_bind_mw_wr *wr) { bseg->flags1 = - convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & + convert_access(wr->bind_info.mw_access_flags) & cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); bseg->flags2 = 0; - if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) + if (wr->mw->type == IB_MW_TYPE_2) bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); - if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) + if (wr->bind_info.mw_access_flags & IB_ZERO_BASED) bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); - bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); - bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); - bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); - bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); + bseg->new_rkey = cpu_to_be32(wr->rkey); + bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey); + bseg->addr = cpu_to_be64(wr->bind_info.addr); + bseg->length = cpu_to_be64(wr->bind_info.length); } static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) @@ -2453,46 +2561,47 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, + struct ib_atomic_wr *wr) { - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); + if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->compare = cpu_to_be64(wr->compare_add); + } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->compare_add); + aseg->compare = cpu_to_be64(wr->compare_add_mask); } else { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->swap_add = cpu_to_be64(wr->compare_add); aseg->compare = 0; } } static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, - struct ib_send_wr *wr) + struct ib_atomic_wr *wr) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); + aseg->compare = cpu_to_be64(wr->compare_add); + aseg->compare_mask = cpu_to_be64(wr->compare_add_mask); } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); - dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); - dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; - memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); + memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(wr->remote_qpn); + dseg->qkey = cpu_to_be32(wr->remote_qkey); + dseg->vlan = to_mah(wr->ah)->av.eth.vlan; + memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6); } static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, + struct ib_ud_wr *wr, enum mlx4_ib_qp_type qpt) { - union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; + union mlx4_ext_av *av = &to_mah(wr->ah)->av; struct mlx4_av sqp_av = {0}; int port = *((u8 *) &av->ib.port_pd) & 0x3; @@ -2511,18 +2620,18 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } -static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) +static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_wqe_inline_seg *inl = wqe; struct mlx4_ib_tunnel_header hdr; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); int spc; int i; memcpy(&hdr.av, &ah->av, sizeof hdr.av); - hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); - hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); - hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + hdr.remote_qpn = cpu_to_be32(wr->remote_qpn); + hdr.pkey_index = cpu_to_be16(wr->pkey_index); + hdr.qkey = cpu_to_be32(wr->remote_qkey); memcpy(hdr.mac, ah->av.eth.mac, 6); hdr.vlan = ah->av.eth.vlan; @@ -2594,22 +2703,22 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { - unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); + unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) *blh = cpu_to_be32(1 << 6); if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && - wr->num_sge > qp->sq.max_gs - (halign >> 4))) + wr->wr.num_sge > qp->sq.max_gs - (halign >> 4))) return -EINVAL; - memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); + memcpy(wqe->header, wr->header, wr->hlen); - *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen); + *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen); *lso_seg_len = halign; return 0; } @@ -2708,11 +2817,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mlx4_wqe_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + @@ -2721,11 +2830,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - set_masked_atomic_seg(wqe, wr); + set_masked_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + @@ -2736,8 +2845,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; @@ -2750,18 +2859,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); - set_fmr_seg(wqe, wr); - wqe += sizeof (struct mlx4_wqe_fmr_seg); - size += sizeof (struct mlx4_wqe_fmr_seg) / 16; + set_reg_seg(wqe, reg_wr(wr)); + wqe += sizeof(struct mlx4_wqe_fmr_seg); + size += sizeof(struct mlx4_wqe_fmr_seg) / 16; break; case IB_WR_BIND_MW: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); - set_bind_seg(wqe, wr); + set_bind_seg(wqe, bind_mw_wr(wr)); wqe += sizeof(struct mlx4_wqe_bind_seg); size += sizeof(struct mlx4_wqe_bind_seg) / 16; break; @@ -2772,7 +2881,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_TUN_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), + ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2783,19 +2893,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case MLX4_IB_QPT_TUN_SMI: case MLX4_IB_QPT_TUN_GSI: /* this is a UD qp used in MAD responses to slaves. */ - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, ud_wr(wr)); /* set the forced-loopback bit in the data seg av */ *(__be32 *) wqe |= cpu_to_be32(0x80000000); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; case MLX4_IB_QPT_UD: - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (wr->opcode == IB_WR_LSO) { - err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); + err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen, + &lso_hdr_sz, &blh); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2807,7 +2918,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), + ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2818,7 +2930,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, add_zero_len_inline(wqe); wqe += 16; size++; - build_tunnel_header(wr, wqe, &seglen); + build_tunnel_header(ud_wr(wr), wqe, &seglen); wqe += seglen; size += seglen / 16; break; @@ -2828,18 +2940,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ - set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, + ud_wr(wr), qp->mlx4_ib_qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; - build_tunnel_header(wr, wqe, &seglen); + build_tunnel_header(ud_wr(wr), wqe, &seglen); wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; diff --git a/kernel/drivers/infiniband/hw/mlx4/srq.c b/kernel/drivers/infiniband/hw/mlx4/srq.c index dce5dfe3a..c394376eb 100644 --- a/kernel/drivers/infiniband/hw/mlx4/srq.c +++ b/kernel/drivers/infiniband/hw/mlx4/srq.c @@ -34,6 +34,7 @@ #include <linux/mlx4/qp.h> #include <linux/mlx4/srq.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include "mlx4_ib.h" #include "user.h" @@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL); if (!srq->wrid) { - err = -ENOMEM; - goto err_mtt; + srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64), + GFP_KERNEL, PAGE_KERNEL); + if (!srq->wrid) { + err = -ENOMEM; + goto err_mtt; + } } } @@ -204,7 +209,7 @@ err_wrid: if (pd->uobject) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); else - kfree(srq->wrid); + kvfree(srq->wrid); err_mtt: mlx4_mtt_cleanup(dev->dev, &srq->mtt); @@ -281,7 +286,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq) mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); ib_umem_release(msrq->umem); } else { - kfree(msrq->wrid); + kvfree(msrq->wrid); mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, &msrq->buf); mlx4_db_free(dev->dev, &msrq->db); diff --git a/kernel/drivers/infiniband/hw/mlx5/Kconfig b/kernel/drivers/infiniband/hw/mlx5/Kconfig index 10df386c6..bce263b92 100644 --- a/kernel/drivers/infiniband/hw/mlx5/Kconfig +++ b/kernel/drivers/infiniband/hw/mlx5/Kconfig @@ -1,8 +1,6 @@ config MLX5_INFINIBAND tristate "Mellanox Connect-IB HCA support" - depends on NETDEVICES && ETHERNET && PCI - select NET_VENDOR_MELLANOX - select MLX5_CORE + depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). diff --git a/kernel/drivers/infiniband/hw/mlx5/cq.c b/kernel/drivers/infiniband/hw/mlx5/cq.c index 2ee6b1051..92ddae101 100644 --- a/kernel/drivers/infiniband/hw/mlx5/cq.c +++ b/kernel/drivers/infiniband/hw/mlx5/cq.c @@ -33,6 +33,7 @@ #include <linux/kref.h> #include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_cache.h> #include "mlx5_ib.h" #include "user.h" @@ -108,8 +109,8 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; - case IB_WR_FAST_REG_MR: - return IB_WC_FAST_REG_MR; + case IB_WR_REG_MR: + return IB_WC_REG_MR; default: pr_warn("unknown completion status\n"); @@ -227,7 +228,14 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->dlid_path_bits = cqe->ml_path; g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; wc->wc_flags |= g ? IB_WC_GRH : 0; - wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff; + if (unlikely(is_qp1(qp->ibqp.qp_type))) { + u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff; + + ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey, + &wc->pkey_index); + } else { + wc->pkey_index = 0; + } } static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) @@ -590,8 +598,7 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, { int err; - err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, - PAGE_SIZE * 2, &buf->buf); + err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf); if (err) return err; @@ -736,25 +743,31 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, &cq->db); } -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct mlx5_create_cq_mbox_in *cqb = NULL; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_cq *cq; int uninitialized_var(index); int uninitialized_var(inlen); int cqe_size; - int irqn; + unsigned int irqn; int eqn; int err; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 0) return ERR_PTR(-EINVAL); entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.gen.max_cqes) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) return ERR_PTR(-EINVAL); cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -921,7 +934,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int err; u32 fsel; - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) + if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -1076,7 +1089,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int uninitialized_var(cqe_size); unsigned long flags; - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { + if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) { pr_info("Firmware does not support resize CQ\n"); return -ENOSYS; } @@ -1085,7 +1098,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -EINVAL; entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.gen.max_cqes + 1) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) return -EINVAL; if (entries == ibcq->cqe + 1) diff --git a/kernel/drivers/infiniband/hw/mlx5/mad.c b/kernel/drivers/infiniband/hw/mlx5/mad.c index 9cf9a37bb..b84d13a48 100644 --- a/kernel/drivers/infiniband/hw/mlx5/mad.c +++ b/kernel/drivers/infiniband/hw/mlx5/mad.c @@ -41,8 +41,8 @@ enum { }; int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { u8 op_modifier = 0; @@ -58,11 +58,19 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, } int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { u16 slid; int err; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); @@ -129,7 +137,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ? + dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: @@ -137,3 +145,300 @@ out: kfree(out_mad); return err; } + +int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, + struct ib_smp *out_mad) +{ + struct ib_smp *in_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + if (!in_mad) + return -ENOMEM; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, + out_mad); + + kfree(in_mad); + return err; +} + +int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, + __be64 *sys_image_guid) +{ + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!out_mad) + return -ENOMEM; + + err = mlx5_query_mad_ifc_smp_attr_node_info(ibdev, out_mad); + if (err) + goto out; + + memcpy(sys_image_guid, out_mad->data + 4, 8); + +out: + kfree(out_mad); + + return err; +} + +int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, + u16 *max_pkeys) +{ + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!out_mad) + return -ENOMEM; + + err = mlx5_query_mad_ifc_smp_attr_node_info(ibdev, out_mad); + if (err) + goto out; + + *max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); + +out: + kfree(out_mad); + + return err; +} + +int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev, + u32 *vendor_id) +{ + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!out_mad) + return -ENOMEM; + + err = mlx5_query_mad_ifc_smp_attr_node_info(ibdev, out_mad); + if (err) + goto out; + + *vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & 0xffff; + +out: + kfree(out_mad); + + return err; +} + +int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(node_desc, out_mad->data, 64); +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(node_guid, out_mad->data + 12, 8); +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, + out_mad); + if (err) + goto out; + + *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, + out_mad); + if (err) + goto out; + + memcpy(gid->raw, out_mad->data + 8, 8); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(index / 8); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, + out_mad); + if (err) + goto out; + + memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int ext_active_speed; + int err = -ENOMEM; + + if (port < 1 || port > MLX5_CAP_GEN(mdev, num_ports)) { + mlx5_ib_warn(dev, "invalid port number %d\n", port); + return -EINVAL; + } + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof(*props)); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) { + mlx5_ib_warn(dev, "err %d\n", err); + goto out; + } + + props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->phys_state = out_mad->data[33] >> 4; + props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); + props->gid_tbl_len = out_mad->data[50]; + props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); + props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; + + /* Check if extended speeds (EDR/FDR/...) are supported */ + if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { + ext_active_speed = out_mad->data[62] >> 4; + + switch (ext_active_speed) { + case 1: + props->active_speed = 16; /* FDR */ + break; + case 2: + props->active_speed = 32; /* EDR */ + break; + } + } + + /* If reported active speed is QDR, check if is FDR-10 */ + if (props->active_speed == 4) { + if (mdev->port_caps[port - 1].ext_port_cap & + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { + init_query_mad(in_mad); + in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, port, + NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = 8; + } + } + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} diff --git a/kernel/drivers/infiniband/hw/mlx5/main.c b/kernel/drivers/infiniband/hw/mlx5/main.c index 57c9809e8..c4e091528 100644 --- a/kernel/drivers/infiniband/hw/mlx5/main.c +++ b/kernel/drivers/infiniband/hw/mlx5/main.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include <asm-generic/kmap_types.h> +#include <linux/highmem.h> #include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> @@ -40,6 +40,7 @@ #include <linux/io-mapping.h> #include <linux/sched.h> #include <rdma/ib_user_verbs.h> +#include <linux/mlx5/vport.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> #include "user.h" @@ -62,32 +63,173 @@ static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; +static enum rdma_link_layer +mlx5_ib_port_link_layer(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + + switch (MLX5_CAP_GEN(dev->mdev, port_type)) { + case MLX5_CAP_PORT_TYPE_IB: + return IB_LINK_LAYER_INFINIBAND; + case MLX5_CAP_PORT_TYPE_ETH: + return IB_LINK_LAYER_ETHERNET; + default: + return IB_LINK_LAYER_UNSPECIFIED; + } +} + +static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) +{ + return !dev->mdev->issi; +} + +enum { + MLX5_VPORT_ACCESS_METHOD_MAD, + MLX5_VPORT_ACCESS_METHOD_HCA, + MLX5_VPORT_ACCESS_METHOD_NIC, +}; + +static int mlx5_get_vport_access_method(struct ib_device *ibdev) +{ + if (mlx5_use_mad_ifc(to_mdev(ibdev))) + return MLX5_VPORT_ACCESS_METHOD_MAD; + + if (mlx5_ib_port_link_layer(ibdev) == + IB_LINK_LAYER_ETHERNET) + return MLX5_VPORT_ACCESS_METHOD_NIC; + + return MLX5_VPORT_ACCESS_METHOD_HCA; +} + +static int mlx5_query_system_image_guid(struct ib_device *ibdev, + __be64 *sys_image_guid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + u64 tmp; + int err; + + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_system_image_guid(ibdev, + sys_image_guid); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + if (!err) + *sys_image_guid = cpu_to_be64(tmp); + return err; + + default: + return -EINVAL; + } +} + +static int mlx5_query_max_pkeys(struct ib_device *ibdev, + u16 *max_pkeys) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + case MLX5_VPORT_ACCESS_METHOD_NIC: + *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, + pkey_table_size)); + return 0; + + default: + return -EINVAL; + } +} + +static int mlx5_query_vendor_id(struct ib_device *ibdev, + u32 *vendor_id) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + case MLX5_VPORT_ACCESS_METHOD_NIC: + return mlx5_core_query_vendor_id(dev->mdev, vendor_id); + + default: + return -EINVAL; + } +} + +static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, + __be64 *node_guid) +{ + u64 tmp; + int err; + + switch (mlx5_get_vport_access_method(&dev->ib_dev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_node_guid(dev, node_guid); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); + if (!err) + *node_guid = cpu_to_be64(tmp); + return err; + + default: + return -EINVAL; + } +} + +struct mlx5_reg_node_desc { + u8 desc[64]; +}; + +static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) +{ + struct mlx5_reg_node_desc in; + + if (mlx5_use_mad_ifc(dev)) + return mlx5_query_mad_ifc_node_desc(dev, node_desc); + + memset(&in, 0, sizeof(in)); + + return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, + sizeof(struct mlx5_reg_node_desc), + MLX5_REG_NODE_DESC, 0, 0); +} + static int mlx5_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - struct mlx5_general_caps *gen; + struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; int max_rq_sg; int max_sq_sg; - u64 flags; + u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); - gen = &dev->mdev->caps.gen; - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; + if (uhw->inlen || uhw->outlen) + return -EINVAL; - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + memset(props, 0, sizeof(*props)); + err = mlx5_query_system_image_guid(ibdev, + &props->sys_image_guid); + if (err) + return err; - err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); if (err) - goto out; + return err; - memset(props, 0, sizeof(*props)); + err = mlx5_query_vendor_id(ibdev, &props->vendor_id); + if (err) + return err; props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | (fw_rev_min(dev->mdev) << 16) | @@ -96,18 +238,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; - flags = gen->flags; - if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) + + if (MLX5_CAP_GEN(mdev, pkv)) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; - if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) + if (MLX5_CAP_GEN(mdev, qkv)) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; - if (flags & MLX5_DEV_CAP_FLAG_APM) + if (MLX5_CAP_GEN(mdev, apm)) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; - props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; - if (flags & MLX5_DEV_CAP_FLAG_XRC) + if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; - if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | @@ -116,221 +257,271 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->sig_guard_cap = IB_GUARD_T10DIF_CRC | IB_GUARD_T10DIF_CSUM; } - if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST) + if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; - props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & - 0xffffff; - props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30)); - props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32)); - memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + props->vendor_part_id = mdev->pdev->device; + props->hw_ver = mdev->pdev->revision; props->max_mr_size = ~0ull; - props->page_size_cap = gen->min_page_sz; - props->max_qp = 1 << gen->log_max_qp; - props->max_qp_wr = gen->max_wqes; - max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + props->page_size_cap = ~(min_page_size - 1); + props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); + props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / + sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - + sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_cq = 1 << gen->log_max_cq; - props->max_cqe = gen->max_cqes - 1; - props->max_mr = 1 << gen->log_max_mkey; - props->max_pd = 1 << gen->log_max_pd; - props->max_qp_rd_atom = 1 << gen->log_max_ra_req_qp; - props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp; - props->max_srq = 1 << gen->log_max_srq; - props->max_srq_wr = gen->max_srq_wqes - 1; - props->local_ca_ack_delay = gen->local_ca_ack_delay; + props->max_sge_rd = props->max_sge; + props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); + props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; + props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); + props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); + props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); + props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); + props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); + props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; + props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->local_ca_ack_delay = gen->local_ca_ack_delay; props->atomic_cap = IB_ATOMIC_NONE; props->masked_atomic_cap = IB_ATOMIC_NONE; - props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); - props->max_mcast_grp = 1 << gen->log_max_mcg; - props->max_mcast_qp_attach = gen->max_qp_mcg; + props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); + props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG) + if (MLX5_CAP_GEN(mdev, pg)) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; #endif -out: - kfree(in_mad); - kfree(out_mad); - - return err; + return 0; } -int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) +enum mlx5_ib_width { + MLX5_IB_WIDTH_1X = 1 << 0, + MLX5_IB_WIDTH_2X = 1 << 1, + MLX5_IB_WIDTH_4X = 1 << 2, + MLX5_IB_WIDTH_8X = 1 << 3, + MLX5_IB_WIDTH_12X = 1 << 4 +}; + +static int translate_active_width(struct ib_device *ibdev, u8 active_width, + u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - struct mlx5_general_caps *gen; - int ext_active_speed; - int err = -ENOMEM; - - gen = &dev->mdev->caps.gen; - if (port < 1 || port > gen->num_ports) { - mlx5_ib_warn(dev, "invalid port number %d\n", port); - return -EINVAL; + int err = 0; + + if (active_width & MLX5_IB_WIDTH_1X) { + *ib_width = IB_WIDTH_1X; + } else if (active_width & MLX5_IB_WIDTH_2X) { + mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n", + (int)active_width); + err = -EINVAL; + } else if (active_width & MLX5_IB_WIDTH_4X) { + *ib_width = IB_WIDTH_4X; + } else if (active_width & MLX5_IB_WIDTH_8X) { + *ib_width = IB_WIDTH_8X; + } else if (active_width & MLX5_IB_WIDTH_12X) { + *ib_width = IB_WIDTH_12X; + } else { + mlx5_ib_dbg(dev, "Invalid active_width %d\n", + (int)active_width); + err = -EINVAL; } - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - memset(props, 0, sizeof(*props)); - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + return err; +} - err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) { - mlx5_ib_warn(dev, "err %d\n", err); - goto out; +static int mlx5_mtu_to_ib_mtu(int mtu) +{ + switch (mtu) { + case 256: return 1; + case 512: return 2; + case 1024: return 3; + case 2048: return 4; + case 4096: return 5; + default: + pr_warn("invalid mtu\n"); + return -1; } +} +enum ib_max_vl_num { + __IB_MAX_VL_0 = 1, + __IB_MAX_VL_0_1 = 2, + __IB_MAX_VL_0_3 = 3, + __IB_MAX_VL_0_7 = 4, + __IB_MAX_VL_0_14 = 5, +}; - props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16)); - props->lmc = out_mad->data[34] & 0x7; - props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18)); - props->sm_sl = out_mad->data[36] & 0xf; - props->state = out_mad->data[32] & 0xf; - props->phys_state = out_mad->data[33] >> 4; - props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); - props->gid_tbl_len = out_mad->data[50]; - props->max_msg_sz = 1 << gen->log_max_msg; - props->pkey_tbl_len = gen->port[port - 1].pkey_table_len; - props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); - props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); - props->active_width = out_mad->data[31] & 0xf; - props->active_speed = out_mad->data[35] >> 4; - props->max_mtu = out_mad->data[41] & 0xf; - props->active_mtu = out_mad->data[36] >> 4; - props->subnet_timeout = out_mad->data[51] & 0x1f; - props->max_vl_num = out_mad->data[37] >> 4; - props->init_type_reply = out_mad->data[41] >> 4; - - /* Check if extended speeds (EDR/FDR/...) are supported */ - if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { - ext_active_speed = out_mad->data[62] >> 4; - - switch (ext_active_speed) { - case 1: - props->active_speed = 16; /* FDR */ - break; - case 2: - props->active_speed = 32; /* EDR */ - break; - } - } +enum mlx5_vl_hw_cap { + MLX5_VL_HW_0 = 1, + MLX5_VL_HW_0_1 = 2, + MLX5_VL_HW_0_2 = 3, + MLX5_VL_HW_0_3 = 4, + MLX5_VL_HW_0_4 = 5, + MLX5_VL_HW_0_5 = 6, + MLX5_VL_HW_0_6 = 7, + MLX5_VL_HW_0_7 = 8, + MLX5_VL_HW_0_14 = 15 +}; - /* If reported active speed is QDR, check if is FDR-10 */ - if (props->active_speed == 4) { - if (gen->ext_port_cap[port - 1] & - MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { - init_query_mad(in_mad); - in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); - - err = mlx5_MAD_IFC(dev, 1, 1, port, - NULL, NULL, in_mad, out_mad); - if (err) - goto out; - - /* Checking LinkSpeedActive for FDR-10 */ - if (out_mad->data[15] & 0x1) - props->active_speed = 8; - } - } +static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, + u8 *max_vl_num) +{ + switch (vl_hw_cap) { + case MLX5_VL_HW_0: + *max_vl_num = __IB_MAX_VL_0; + break; + case MLX5_VL_HW_0_1: + *max_vl_num = __IB_MAX_VL_0_1; + break; + case MLX5_VL_HW_0_3: + *max_vl_num = __IB_MAX_VL_0_3; + break; + case MLX5_VL_HW_0_7: + *max_vl_num = __IB_MAX_VL_0_7; + break; + case MLX5_VL_HW_0_14: + *max_vl_num = __IB_MAX_VL_0_14; + break; -out: - kfree(in_mad); - kfree(out_mad); + default: + return -EINVAL; + } - return err; + return 0; } -static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid) +static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *rep; + int max_mtu; + int oper_mtu; + int err; + u8 ib_link_width_oper; + u8 vl_hw_cap; - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) { + err = -ENOMEM; goto out; + } - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + memset(props, 0, sizeof(*props)); - err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); if (err) goto out; - memcpy(gid->raw, out_mad->data + 8, 8); - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; - in_mad->attr_mod = cpu_to_be32(index / 8); + props->lid = rep->lid; + props->lmc = rep->lmc; + props->sm_lid = rep->sm_lid; + props->sm_sl = rep->sm_sl; + props->state = rep->vport_state; + props->phys_state = rep->port_physical_state; + props->port_cap_flags = rep->cap_mask1; + props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); + props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); + props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); + props->bad_pkey_cntr = rep->pkey_violation_counter; + props->qkey_viol_cntr = rep->qkey_violation_counter; + props->subnet_timeout = rep->subnet_timeout; + props->init_type_reply = rep->init_type_reply; + + err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); + if (err) + goto out; - err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + err = translate_active_width(ibdev, ib_link_width_oper, + &props->active_width); + if (err) + goto out; + err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB, + port); if (err) goto out; - memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + mlx5_query_port_max_mtu(mdev, &max_mtu, port); + props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); + + mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); + + props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu); + + err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port); + if (err) + goto out; + + err = translate_max_vl_num(ibdev, vl_hw_cap, + &props->max_vl_num); out: - kfree(in_mad); - kfree(out_mad); + kfree(rep); return err; } -static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) +int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_port(ibdev, port, props); - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; + case MLX5_VPORT_ACCESS_METHOD_HCA: + return mlx5_query_hca_port(ibdev, port, props); + + default: + return -EINVAL; + } +} - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; - in_mad->attr_mod = cpu_to_be32(index / 32); +static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; - err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) - goto out; + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_gids(ibdev, port, index, gid); - *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]); + case MLX5_VPORT_ACCESS_METHOD_HCA: + return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid); + + default: + return -EINVAL; + } -out: - kfree(in_mad); - kfree(out_mad); - return err; } -struct mlx5_reg_node_desc { - u8 desc[64]; -}; +static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + case MLX5_VPORT_ACCESS_METHOD_NIC: + return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, + pkey); + default: + return -EINVAL; + } +} static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) @@ -392,7 +583,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_ib_alloc_ucontext_req_v2 req; struct mlx5_ib_alloc_ucontext_resp resp; struct mlx5_ib_ucontext *context; - struct mlx5_general_caps *gen; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; int gross_uuars; @@ -403,7 +593,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int i; size_t reqlen; - gen = &dev->mdev->caps.gen; if (!dev->ib_active) return ERR_PTR(-EAGAIN); @@ -436,14 +625,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; - resp.qp_tab_size = 1 << gen->log_max_qp; - resp.bf_reg_size = gen->bf_reg_size; - resp.cache_line_size = L1_CACHE_BYTES; - resp.max_sq_desc_sz = gen->max_sq_desc_sz; - resp.max_rq_desc_sz = gen->max_rq_desc_sz; - resp.max_send_wqebb = gen->max_wqes; - resp.max_recv_wr = gen->max_wqes; - resp.max_srq_recv_wr = gen->max_srq_wqes; + resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); + resp.cache_line_size = L1_CACHE_BYTES; + resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -493,7 +682,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, mutex_init(&context->db_page_mutex); resp.tot_uuars = req.total_num_uuars; - resp.num_ports = gen->num_ports; + resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); err = ib_copy_to_udata(udata, &resp, sizeof(resp) - sizeof(resp.reserved)); if (err) @@ -605,53 +794,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } -static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) -{ - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_mkey_seg *seg; - struct mlx5_core_mr mr; - int err; - - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - seg = &in->seg; - seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; - seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - seg->start_addr = 0; - - err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in), - NULL, NULL, NULL); - if (err) { - mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); - goto err_in; - } - - kfree(in); - *key = mr.key; - - return 0; - -err_in: - kfree(in); - - return err; -} - -static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) -{ - struct mlx5_core_mr mr; - int err; - - memset(&mr, 0, sizeof(mr)); - mr.key = key; - err = mlx5_core_destroy_mkey(dev->mdev, &mr); - if (err) - mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); -} - static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -677,13 +819,6 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, kfree(pd); return ERR_PTR(-EFAULT); } - } else { - err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); - if (err) { - mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); - kfree(pd); - return ERR_PTR(err); - } } return &pd->ibpd; @@ -694,9 +829,6 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - if (!pd->uobject) - free_pa_mkey(mdev, mpd->pa_lkey); - mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); kfree(mpd); @@ -731,37 +863,15 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int init_node_data(struct mlx5_ib_dev *dev) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; - - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; - - err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); - if (err) - goto out; - - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); - - in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + int err; - err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); if (err) - goto out; + return err; - dev->mdev->rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32)); - memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + dev->mdev->rev_id = dev->mdev->pdev->revision; -out: - kfree(in_mad); - kfree(out_mad); - return err; + return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); } static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, @@ -895,11 +1005,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - struct mlx5_general_caps *gen; int port; - gen = &dev->mdev->caps.gen; - for (port = 1; port <= gen->num_ports; port++) + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) mlx5_query_ext_port_caps(dev, port); } @@ -907,11 +1015,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; - struct mlx5_general_caps *gen; int err = -ENOMEM; int port; + struct ib_udata uhw = {.inlen = 0, .outlen = 0}; - gen = &dev->mdev->caps.gen; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) goto out; @@ -920,20 +1027,23 @@ static int get_port_caps(struct mlx5_ib_dev *dev) if (!dprops) goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops); + err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); goto out; } - for (port = 1; port <= gen->num_ports; port++) { + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); + mlx5_ib_warn(dev, "query_port %d failed %d\n", + port, err); break; } - gen->port[port - 1].pkey_table_len = dprops->max_pkeys; - gen->port[port - 1].gid_table_len = pprops->gid_tbl_len; + dev->mdev->port_caps[port - 1].pkey_table_len = + dprops->max_pkeys; + dev->mdev->port_caps[port - 1].gid_table_len = + pprops->gid_tbl_len; mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", dprops->max_pkeys, pprops->gid_tbl_len); } @@ -955,7 +1065,6 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev) mlx5_ib_destroy_qp(dev->umrc.qp); ib_destroy_cq(dev->umrc.cq); - ib_dereg_mr(dev->umrc.mr); ib_dealloc_pd(dev->umrc.pd); } @@ -970,7 +1079,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; - struct ib_mr *mr; + struct ib_cq_init_attr cq_attr = {}; int ret; attr = kzalloc(sizeof(*attr), GFP_KERNEL); @@ -987,15 +1096,9 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_0; } - mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(mr)) { - mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n"); - ret = PTR_ERR(mr); - goto error_1; - } - - cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128, - 0); + cq_attr.cqe = 128; + cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, + &cq_attr); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); @@ -1050,7 +1153,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev) dev->umrc.qp = qp; dev->umrc.cq = cq; - dev->umrc.mr = mr; dev->umrc.pd = pd; sema_init(&dev->umrc.sem, MAX_UMR_WR); @@ -1072,9 +1174,6 @@ error_3: ib_destroy_cq(cq); error_2: - ib_dereg_mr(mr); - -error_1: ib_dealloc_pd(pd); error_0: @@ -1087,6 +1186,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; + struct ib_cq_init_attr cq_attr = {.cqe = 1}; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); @@ -1100,7 +1200,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL); + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); goto error1; @@ -1159,8 +1259,29 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); + memset(&attr, 0, sizeof(attr)); + attr.attr.max_sge = 1; + attr.attr.max_wr = 1; + attr.srq_type = IB_SRQT_BASIC; + devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); + if (IS_ERR(devr->s1)) { + ret = PTR_ERR(devr->s1); + goto error5; + } + devr->s1->device = &dev->ib_dev; + devr->s1->pd = devr->p0; + devr->s1->uobject = NULL; + devr->s1->event_handler = NULL; + devr->s1->srq_context = NULL; + devr->s1->srq_type = IB_SRQT_BASIC; + devr->s1->ext.xrc.cq = devr->c0; + atomic_inc(&devr->p0->usecnt); + atomic_set(&devr->s0->usecnt, 0); + return 0; +error5: + mlx5_ib_destroy_srq(devr->s0); error4: mlx5_ib_dealloc_xrcd(devr->x1); error3: @@ -1175,6 +1296,7 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { + mlx5_ib_destroy_srq(devr->s1); mlx5_ib_destroy_srq(devr->s0); mlx5_ib_dealloc_xrcd(devr->x0); mlx5_ib_dealloc_xrcd(devr->x1); @@ -1182,12 +1304,34 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_pd(devr->p0); } +static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mlx5_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; int err; int i; + /* don't create IB instance over Eth ports, no RoCE yet! */ + if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + return NULL; + printk_once(KERN_INFO "%s", mlx5_version); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); @@ -1200,15 +1344,16 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_dealloc; - get_ext_port_caps(dev); + if (mlx5_use_mad_ifc(dev)) + get_ext_port_caps(dev); MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey; - dev->num_ports = mdev->caps.gen.num_ports; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = dev->mdev->priv.eq_table.num_comp_vectors; @@ -1276,19 +1421,17 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; - dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; - dev->ib_dev.create_mr = mlx5_ib_create_mr; - dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; - dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; - dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; + dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; + dev->ib_dev.get_port_immutable = mlx5_port_immutable; - mlx5_ib_internal_query_odp_caps(dev); + mlx5_ib_internal_fill_odp_caps(dev); - if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) { + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= diff --git a/kernel/drivers/infiniband/hw/mlx5/mlx5_ib.h b/kernel/drivers/infiniband/hw/mlx5/mlx5_ib.h index dff1cfcdf..633347260 100644 --- a/kernel/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/kernel/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -103,7 +103,6 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte struct mlx5_ib_pd { struct ib_pd ibpd; u32 pdn; - u32 pa_lkey; }; /* Use macros here so that don't have to duplicate @@ -213,7 +212,6 @@ struct mlx5_ib_qp { int uuarn; int create_type; - u32 pa_lkey; /* Store signature errors */ bool signature_en; @@ -247,6 +245,7 @@ enum mlx5_ib_qp_flags { }; struct mlx5_umr_wr { + struct ib_send_wr wr; union { u64 virt_addr; u64 offset; @@ -259,6 +258,11 @@ struct mlx5_umr_wr { u32 mkey; }; +static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct mlx5_umr_wr, wr); +} + struct mlx5_shared_mr_info { int mr_id; struct ib_umem *umem; @@ -315,6 +319,11 @@ enum mlx5_ib_mtt_access_flags { struct mlx5_ib_mr { struct ib_mr ibmr; + void *descs; + dma_addr_t desc_map; + int ndescs; + int max_descs; + int desc_size; struct mlx5_core_mr mmr; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; @@ -326,12 +335,7 @@ struct mlx5_ib_mr { struct mlx5_create_mkey_mbox_out out; struct mlx5_core_sig_ctx *sig; int live; -}; - -struct mlx5_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - __be64 *mapped_page_list; - dma_addr_t map; + void *descs_alloc; }; struct mlx5_ib_umr_context { @@ -349,7 +353,6 @@ struct umr_common { struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; - struct ib_mr *mr; /* control access to UMR QP */ struct semaphore sem; @@ -361,20 +364,6 @@ enum { MLX5_FMR_BUSY, }; -struct mlx5_ib_fmr { - struct ib_fmr ibfmr; - struct mlx5_core_mr mr; - int access_flags; - int state; - /* protect fmr state - */ - spinlock_t lock; - u64 wrid; - struct ib_send_wr wr[2]; - u8 page_shift; - struct ib_fast_reg_page_list page_list; -}; - struct mlx5_cache_ent { struct list_head head; /* sync access to the cahce entry @@ -415,6 +404,7 @@ struct mlx5_ib_resources { struct ib_xrcd *x1; struct ib_pd *p0; struct ib_srq *s0; + struct ib_srq *s1; }; struct mlx5_ib_dev { @@ -458,11 +448,6 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } -static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr); -} - static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); @@ -503,11 +488,6 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx5_ib_mr, ibmr); } -static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) -{ - return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl); -} - struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; @@ -525,8 +505,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, struct mlx5_ib_ah *ah); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); @@ -556,8 +536,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length); -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_destroy_cq(struct ib_cq *cq); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); @@ -571,29 +552,39 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int zap); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); -int mlx5_ib_destroy_mr(struct ib_mr *ibmr); -struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, - struct ib_mr_init_attr *mr_init_attr); -struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, - int max_page_list_len); -struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len); -void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); -struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc, - struct ib_fmr_attr *fmr_attr); -int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int npages, u64 iova); -int mlx5_ib_unmap_fmr(struct list_head *fmr_list); -int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr); +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); +int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); +int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, + struct ib_smp *out_mad); +int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, + __be64 *sys_image_guid); +int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, + u16 *max_pkeys); +int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev, + u32 *vendor_id); +int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc); +int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid); +int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey); +int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); +int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); @@ -617,7 +608,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; -int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev); +void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault); void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp); @@ -631,9 +622,9 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, unsigned long end); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) +static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { - return 0; + return; } static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {} @@ -663,6 +654,11 @@ static inline u8 convert_access(int acc) MLX5_PERM_LOCAL_READ; } +static inline int is_qp1(enum ib_qp_type qp_type) +{ + return qp_type == IB_QPT_GSI; +} + #define MLX5_MAX_UMR_SHIFT 16 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) diff --git a/kernel/drivers/infiniband/hw/mlx5/mr.c b/kernel/drivers/infiniband/hw/mlx5/mr.c index 0c52f0787..6000f7aee 100644 --- a/kernel/drivers/infiniband/hw/mlx5/mr.c +++ b/kernel/drivers/infiniband/hw/mlx5/mr.c @@ -381,7 +381,19 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) } } } else if (ent->cur > 2 * ent->limit) { - if (!someone_adding(cache) && + /* + * The remove_keys() logic is performed as garbage collection + * task. Such task is intended to be run when no other active + * processes are running. + * + * The need_resched() will return TRUE if there are user tasks + * to be activated in near future. + * + * In such case, we don't execute remove_keys() and postpone + * the garbage collection work to try to run in next cycle, + * in order to free CPU resources to other tasks. + */ + if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { remove_keys(dev, i, 1); if (ent->cur > ent->limit) @@ -441,9 +453,6 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) spin_unlock_irq(&ent->lock); queue_work(cache->wq, &ent->work); - - if (mr) - break; } if (!mr) @@ -690,12 +699,11 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_mr *mr = dev->umrc.mr; - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); sg->addr = dma; sg->length = ALIGN(sizeof(u64) * n, 64); - sg->lkey = mr->lkey; + sg->lkey = dev->umrc.pd->local_dma_lkey; wr->next = NULL; wr->send_flags = 0; @@ -719,7 +727,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, struct ib_send_wr *wr, u32 key) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; wr->opcode = MLX5_IB_WR_UMR; @@ -756,7 +764,8 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct ib_send_wr wr, *bad; + struct mlx5_umr_wr umrwr; + struct ib_send_wr *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; int size; @@ -802,14 +811,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, goto free_pas; } - memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift, - virt_addr, len, access_flags); + memset(&umrwr, 0, sizeof(umrwr)); + umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, + page_shift, virt_addr, len, access_flags); mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); goto unmap_dma; @@ -855,8 +864,8 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int size; __be64 *pas; dma_addr_t dma; - struct ib_send_wr wr, *bad; - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; + struct ib_send_wr *bad; + struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); @@ -921,26 +930,26 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; + wr.wr.wr_id = (u64)(unsigned long)&umr_context; sg.addr = dma; sg.length = ALIGN(npages * sizeof(u64), MLX5_UMR_MTT_ALIGNMENT); - sg.lkey = dev->umrc.mr->lkey; + sg.lkey = dev->umrc.pd->local_dma_lkey; - wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | MLX5_IB_SEND_UMR_UPDATE_MTT; - wr.sg_list = &sg; - wr.num_sge = 1; - wr.opcode = MLX5_IB_WR_UMR; - umrwr->npages = sg.length / sizeof(u64); - umrwr->page_shift = PAGE_SHIFT; - umrwr->mkey = mr->mmr.key; - umrwr->target.offset = start_page_index; + wr.wr.sg_list = &sg; + wr.wr.num_sge = 1; + wr.wr.opcode = MLX5_IB_WR_UMR; + wr.npages = sg.length / sizeof(u64); + wr.page_shift = PAGE_SHIFT; + wr.mkey = mr->mmr.key; + wr.target.offset = start_page_index; mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &wr.wr, &bad); if (err) { mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); } else { @@ -975,8 +984,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, struct mlx5_ib_mr *mr; int inlen; int err; - bool pg_cap = !!(dev->mdev->caps.gen.flags & - MLX5_DEV_CAP_FLAG_ON_DMND_PG); + bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -1127,16 +1135,17 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct ib_send_wr wr, *bad; + struct mlx5_umr_wr umrwr; + struct ib_send_wr *bad; int err; - memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + memset(&umrwr.wr, 0, sizeof(umrwr)); + umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key); mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { up(&umrc->sem); mlx5_ib_dbg(dev, "err %d\n", err); @@ -1156,12 +1165,73 @@ error: return err; } +static int +mlx5_alloc_priv_descs(struct ib_device *device, + struct mlx5_ib_mr *mr, + int ndescs, + int desc_size) +{ + int size = ndescs * desc_size; + int add_size; + int ret; + + add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + + mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); + if (!mr->descs_alloc) + return -ENOMEM; + + mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); + + mr->desc_map = dma_map_single(device->dma_device, mr->descs, + size, DMA_TO_DEVICE); + if (dma_mapping_error(device->dma_device, mr->desc_map)) { + ret = -ENOMEM; + goto err; + } + + return 0; +err: + kfree(mr->descs_alloc); + + return ret; +} + +static void +mlx5_free_priv_descs(struct mlx5_ib_mr *mr) +{ + if (mr->descs) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_descs * mr->desc_size; + + dma_unmap_single(device->dma_device, mr->desc_map, + size, DMA_TO_DEVICE); + kfree(mr->descs_alloc); + mr->descs = NULL; + } +} + static int clean_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); int umred = mr->umred; int err; + if (mr->sig) { + if (mlx5_core_destroy_psv(dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + kfree(mr->sig); + mr->sig = NULL; + } + + mlx5_free_priv_descs(mr); + if (!umred) { err = destroy_mkey(dev, mr); if (err) { @@ -1223,14 +1293,15 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) return 0; } -struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, - struct ib_mr_init_attr *mr_init_attr) +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; int access_mode, err; - int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); + int ndescs = roundup(max_num_sg, 4); mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -1246,9 +1317,19 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, in->seg.xlt_oct_size = cpu_to_be32(ndescs); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); - access_mode = MLX5_ACCESS_MODE_MTT; - if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { + if (mr_type == IB_MR_TYPE_MEM_REG) { + access_mode = MLX5_ACCESS_MODE_MTT; + in->seg.log2_page_size = PAGE_SHIFT; + + err = mlx5_alloc_priv_descs(pd->device, mr, + ndescs, sizeof(u64)); + if (err) + goto err_free_in; + + mr->desc_size = sizeof(u64); + mr->max_descs = ndescs; + } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | @@ -1274,6 +1355,10 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, mr->sig->sig_err_exists = false; /* Next UMR, Arm SIGERR */ ++mr->sig->sigerr_count; + } else { + mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); + err = -EINVAL; + goto err_free_in; } in->seg.flags = MLX5_PERM_UMR_EN | access_mode; @@ -1300,6 +1385,7 @@ err_destroy_psv: mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); } + mlx5_free_priv_descs(mr); err_free_sig: kfree(mr->sig); err_free_in: @@ -1309,122 +1395,6 @@ err_free: return ERR_PTR(err); } -int mlx5_ib_destroy_mr(struct ib_mr *ibmr) -{ - struct mlx5_ib_dev *dev = to_mdev(ibmr->device); - struct mlx5_ib_mr *mr = to_mmr(ibmr); - int err; - - if (mr->sig) { - if (mlx5_core_destroy_psv(dev->mdev, - mr->sig->psv_memory.psv_idx)) - mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", - mr->sig->psv_memory.psv_idx); - if (mlx5_core_destroy_psv(dev->mdev, - mr->sig->psv_wire.psv_idx)) - mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", - mr->sig->psv_wire.psv_idx); - kfree(mr->sig); - } - - err = destroy_mkey(dev, mr); - if (err) { - mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - mr->mmr.key, err); - return err; - } - - kfree(mr); - - return err; -} - -struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, - int max_page_list_len) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_ib_mr *mr; - int err; - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); - - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto err_free; - } - - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); - /* - * TBD not needed - issue 197292 */ - in->seg.log2_page_size = PAGE_SHIFT; - - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL, - NULL, NULL); - kfree(in); - if (err) - goto err_free; - - mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.rkey = mr->mmr.key; - mr->umem = NULL; - - return &mr->ibmr; - -err_free: - kfree(mr); - return ERR_PTR(err); -} - -struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len) -{ - struct mlx5_ib_fast_reg_page_list *mfrpl; - int size = page_list_len * sizeof(u64); - - mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); - if (!mfrpl) - return ERR_PTR(-ENOMEM); - - mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); - if (!mfrpl->ibfrpl.page_list) - goto err_free; - - mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, - size, &mfrpl->map, - GFP_KERNEL); - if (!mfrpl->mapped_page_list) - goto err_free; - - WARN_ON(mfrpl->map & 0x3f); - - return &mfrpl->ibfrpl; - -err_free: - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); - return ERR_PTR(-ENOMEM); -} - -void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); - struct mlx5_ib_dev *dev = to_mdev(page_list->device); - int size = page_list->max_page_list_len * sizeof(u64); - - dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list, - mfrpl->map); - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); -} - int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status) { @@ -1465,3 +1435,39 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, done: return ret; } + +static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + __be64 *descs; + + if (unlikely(mr->ndescs == mr->max_descs)) + return -ENOMEM; + + descs = mr->descs; + descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); + + return 0; +} + +int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int n; + + mr->ndescs = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, + mr->desc_size * mr->max_descs, + DMA_TO_DEVICE); + + n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); + + ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, + mr->desc_size * mr->max_descs, + DMA_TO_DEVICE); + + return n; +} diff --git a/kernel/drivers/infiniband/hw/mlx5/odp.c b/kernel/drivers/infiniband/hw/mlx5/odp.c index 5099db08a..aa8391e75 100644 --- a/kernel/drivers/infiniband/hw/mlx5/odp.c +++ b/kernel/drivers/infiniband/hw/mlx5/odp.c @@ -109,40 +109,33 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, ib_umem_odp_unmap_dma_pages(umem, start, end); } -#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ - if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \ - ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \ -} while (0) - -int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) +void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { - int err; - struct mlx5_odp_caps hw_caps; struct ib_odp_caps *caps = &dev->odp_caps; memset(caps, 0, sizeof(*caps)); - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)) - return 0; - - err = mlx5_query_odp_caps(dev->mdev, &hw_caps); - if (err) - goto out; + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return; caps->general_caps = IB_ODP_SUPPORT; - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.ud_odp_caps, - SEND); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - SEND); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - RECV); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - WRITE); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - READ); - -out: - return err; + + if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send)) + caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.receive)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.write)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ; + + return; } static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, diff --git a/kernel/drivers/infiniband/hw/mlx5/qp.c b/kernel/drivers/infiniband/hw/mlx5/qp.c index d35f62d4f..307bdbca8 100644 --- a/kernel/drivers/infiniband/hw/mlx5/qp.c +++ b/kernel/drivers/infiniband/hw/mlx5/qp.c @@ -64,7 +64,7 @@ static const u32 mlx5_ib_opcode[] = { [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, - [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_REG_MR] = MLX5_OPCODE_UMR, [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, @@ -76,11 +76,6 @@ static int is_qp0(enum ib_qp_type qp_type) return qp_type == IB_QPT_SMI; } -static int is_qp1(enum ib_qp_type qp_type) -{ - return qp_type == IB_QPT_GSI; -} - static int is_sqp(enum ib_qp_type qp_type) { return is_qp0(qp_type) || is_qp1(qp_type); @@ -220,13 +215,11 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { - struct mlx5_general_caps *gen; int wqe_size; int wq_size; - gen = &dev->mdev->caps.gen; /* Sanity check RQ size before proceeding */ - if (cap->max_recv_wr > gen->max_wqes) + if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) return -EINVAL; if (!has_rq) { @@ -246,10 +239,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); qp->rq.wqe_cnt = wq_size / wqe_size; - if (wqe_size > gen->max_rq_desc_sz) { + if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) { mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", wqe_size, - gen->max_rq_desc_sz); + MLX5_CAP_GEN(dev->mdev, + max_wqe_sz_rq)); return -EINVAL; } qp->rq.wqe_shift = ilog2(wqe_size); @@ -330,11 +324,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { - struct mlx5_general_caps *gen; int wqe_size; int wq_size; - gen = &dev->mdev->caps.gen; if (!attr->cap.max_send_wr) return 0; @@ -343,9 +335,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, if (wqe_size < 0) return wqe_size; - if (wqe_size > gen->max_sq_desc_sz) { + if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) { mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", - wqe_size, gen->max_sq_desc_sz); + wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)); return -EINVAL; } @@ -358,9 +350,10 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; - if (qp->sq.wqe_cnt > gen->max_wqes) { + if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", - qp->sq.wqe_cnt, gen->max_wqes); + qp->sq.wqe_cnt, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); @@ -375,13 +368,11 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { - struct mlx5_general_caps *gen; int desc_sz = 1 << qp->sq.wqe_shift; - gen = &dev->mdev->caps.gen; - if (desc_sz > gen->max_sq_desc_sz) { + if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) { mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", - desc_sz, gen->max_sq_desc_sz); + desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)); return -EINVAL; } @@ -393,9 +384,10 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, qp->sq.wqe_cnt = ucmd->sq_wqe_count; - if (qp->sq.wqe_cnt > gen->max_wqes) { + if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", - qp->sq.wqe_cnt, gen->max_wqes); + qp->sq.wqe_cnt, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -EINVAL; } @@ -768,7 +760,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); - err = mlx5_buf_alloc(dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf); + err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); goto err_uuar; @@ -866,22 +858,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; + struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; - struct mlx5_general_caps *gen; struct mlx5_ib_create_qp ucmd; int inlen = sizeof(*in); int err; mlx5_ib_odp_create_qp(qp); - gen = &dev->mdev->caps.gen; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { + if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); return -EINVAL; } else { @@ -914,15 +905,17 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (pd) { if (pd->uobject) { + __u32 max_wqes = + 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || ucmd.rq_wqe_count != qp->rq.wqe_cnt) { mlx5_ib_dbg(dev, "invalid rq params\n"); return -EINVAL; } - if (ucmd.sq_wqe_count > gen->max_wqes) { + if (ucmd.sq_wqe_count > max_wqes) { mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd.sq_wqe_count, gen->max_wqes); + ucmd.sq_wqe_count, max_wqes); return -EINVAL; } err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); @@ -932,8 +925,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); if (err) mlx5_ib_dbg(dev, "err %d\n", err); - else - qp->pa_lkey = to_mpd(pd)->pa_lkey; } if (err) @@ -1014,7 +1005,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn); } else { in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + in->ctx.rq_type_srqn |= + cpu_to_be32(to_msrq(devr->s1)->msrq.srqn); } } @@ -1226,7 +1218,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx5_general_caps *gen; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; u16 xrcdn = 0; @@ -1244,12 +1235,11 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, } dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); } - gen = &dev->mdev->caps.gen; switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: case IB_QPT_XRC_INI: - if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) { + if (!MLX5_CAP_GEN(dev->mdev, xrc)) { mlx5_ib_dbg(dev, "XRC not supported\n"); return ERR_PTR(-ENOSYS); } @@ -1356,9 +1346,6 @@ enum { static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { - struct mlx5_general_caps *gen; - - gen = &dev->mdev->caps.gen; if (rate == IB_RATE_PORT_CURRENT) { return 0; } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { @@ -1366,7 +1353,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) } else { while (rate != IB_RATE_2_5_GBPS && !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - gen->stat_rate_support)) + MLX5_CAP_GEN(dev->mdev, stat_rate_support))) --rate; } @@ -1377,10 +1364,8 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr) { - struct mlx5_general_caps *gen; int err; - gen = &dev->mdev->caps.gen; path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; @@ -1391,9 +1376,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->rlid = cpu_to_be16(ah->dlid); if (ah->ah_flags & IB_AH_GRH) { - if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) { + if (ah->grh.sgid_index >= + dev->mdev->port_caps[port - 1].gid_table_len) { pr_err("sgid_index (%u) too large. max is %d\n", - ah->grh.sgid_index, gen->port[port - 1].gid_table_len); + ah->grh.sgid_index, + dev->mdev->port_caps[port - 1].gid_table_len); return -EINVAL; } path->grh_mlid |= 1 << 7; @@ -1570,7 +1557,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; - struct mlx5_general_caps *gen; struct mlx5_modify_qp_mbox_in *in; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; @@ -1579,7 +1565,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int mlx5_st; int err; - gen = &dev->mdev->caps.gen; in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return -ENOMEM; @@ -1619,7 +1604,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = -EINVAL; goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg; + context->mtu_msgmax = (attr->path_mtu << 5) | + (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg); } if (attr_mask & IB_QP_DEST_QPN) @@ -1777,11 +1763,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; - struct mlx5_general_caps *gen; int err = -EINVAL; int port; - gen = &dev->mdev->caps.gen; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -1793,21 +1777,25 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > gen->num_ports)) + (attr->port_num == 0 || + attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) goto out; if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= gen->port[port - 1].pkey_table_len) + if (attr->pkey_index >= + dev->mdev->port_caps[port - 1].pkey_table_len) goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp)) + attr->max_rd_atomic > + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) goto out; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp)) + attr->max_dest_rd_atomic > + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) goto out; if (cur_state == new_state && cur_state == IB_QPS_RESET) { @@ -1850,9 +1838,9 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { - memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av)); - dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV); - dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); } static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) @@ -1908,22 +1896,24 @@ static __be64 sig_mkey_mask(void) return cpu_to_be64(result); } -static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int li) +static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, + struct mlx5_ib_mr *mr) { - memset(umr, 0, sizeof(*umr)); - - if (li) { - umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - umr->flags = 1 << 7; - return; - } + int ndescs = mr->ndescs; - umr->flags = (1 << 5); /* fail if not free */ - umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len); + memset(umr, 0, sizeof(*umr)); + umr->flags = MLX5_UMR_CHECK_NOT_FREE; + umr->klm_octowords = get_klm_octo(ndescs); umr->mkey_mask = frwr_mkey_mask(); } +static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) +{ + memset(umr, 0, sizeof(*umr)); + umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr->flags = 1 << 7; +} + static __be64 get_umr_reg_mr_mask(void) { u64 result; @@ -1964,7 +1954,7 @@ static __be64 get_umr_update_mtt_mask(void) static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(umr, 0, sizeof(*umr)); @@ -1999,29 +1989,31 @@ static u8 get_umr_flags(int acc) MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; } -static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, - int li, int *writ) +static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, + struct mlx5_ib_mr *mr, + u32 key, int access) { - memset(seg, 0, sizeof(*seg)); - if (li) { - seg->status = MLX5_MKEY_STATUS_FREE; - return; - } + int ndescs = ALIGN(mr->ndescs, 8) >> 1; - seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | - MLX5_ACCESS_MODE_MTT; - *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); - seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); + memset(seg, 0, sizeof(*seg)); + seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT; + seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); - seg->len = cpu_to_be64(wr->wr.fast_reg.length); - seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2); - seg->log2_page_size = wr->wr.fast_reg.page_shift; + seg->start_addr = cpu_to_be64(mr->ibmr.iova); + seg->len = cpu_to_be64(mr->ibmr.length); + seg->xlt_oct_size = cpu_to_be32(ndescs); + seg->log2_page_size = ilog2(mr->ibmr.page_size); +} + +static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) +{ + memset(seg, 0, sizeof(*seg)); + seg->status = MLX5_MKEY_STATUS_FREE; } static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { @@ -2040,22 +2032,15 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w mlx5_mkey_variant(umrwr->mkey)); } -static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, - struct ib_send_wr *wr, - struct mlx5_core_dev *mdev, - struct mlx5_ib_pd *pd, - int writ) +static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, + struct mlx5_ib_mr *mr, + struct mlx5_ib_pd *pd) { - struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); - u64 *page_list = wr->wr.fast_reg.page_list->page_list; - u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0); - int i; + int bcount = mr->desc_size * mr->ndescs; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) - mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); - dseg->addr = cpu_to_be64(mfrpl->map); - dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64)); - dseg->lkey = cpu_to_be32(pd->pa_lkey); + dseg->addr = cpu_to_be64(mr->desc_map); + dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); + dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } static __be32 send_ieth(struct ib_send_wr *wr) @@ -2236,22 +2221,22 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, - void **seg, int *size) +static int set_sig_data_segment(struct ib_sig_handover_wr *wr, + struct mlx5_ib_qp *qp, void **seg, int *size) { - struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; - struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct ib_sig_attrs *sig_attrs = wr->sig_attrs; + struct ib_mr *sig_mr = wr->sig_mr; struct mlx5_bsf *bsf; - u32 data_len = wr->sg_list->length; - u32 data_key = wr->sg_list->lkey; - u64 data_va = wr->sg_list->addr; + u32 data_len = wr->wr.sg_list->length; + u32 data_key = wr->wr.sg_list->lkey; + u64 data_va = wr->wr.sg_list->addr; int ret; int wqe_size; - if (!wr->wr.sig_handover.prot || - (data_key == wr->wr.sig_handover.prot->lkey && - data_va == wr->wr.sig_handover.prot->addr && - data_len == wr->wr.sig_handover.prot->length)) { + if (!wr->prot || + (data_key == wr->prot->lkey && + data_va == wr->prot->addr && + data_len == wr->prot->length)) { /** * Source domain doesn't contain signature information * or data and protection are interleaved in memory. @@ -2285,8 +2270,8 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, struct mlx5_stride_block_ctrl_seg *sblock_ctrl; struct mlx5_stride_block_entry *data_sentry; struct mlx5_stride_block_entry *prot_sentry; - u32 prot_key = wr->wr.sig_handover.prot->lkey; - u64 prot_va = wr->wr.sig_handover.prot->addr; + u32 prot_key = wr->prot->lkey; + u64 prot_va = wr->prot->addr; u16 block_size = sig_attrs->mem.sig.dif.pi_interval; int prot_size; @@ -2338,16 +2323,16 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_send_wr *wr, u32 nelements, + struct ib_sig_handover_wr *wr, u32 nelements, u32 length, u32 pdn) { - struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct ib_mr *sig_mr = wr->sig_mr; u32 sig_key = sig_mr->rkey; u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + seg->flags = get_umr_flags(wr->access_flags) | MLX5_ACCESS_MODE_KLM; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | @@ -2358,7 +2343,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, } static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, u32 nelements) + u32 nelements) { memset(umr, 0, sizeof(*umr)); @@ -2369,37 +2354,37 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, } -static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, +static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, void **seg, int *size) { - struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); u32 pdn = get_pd(qp)->pdn; u32 klm_oct_size; int region_len, ret; - if (unlikely(wr->num_sge != 1) || - unlikely(wr->wr.sig_handover.access_flags & - IB_ACCESS_REMOTE_ATOMIC) || + if (unlikely(wr->wr.num_sge != 1) || + unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ - region_len = wr->sg_list->length; - if (wr->wr.sig_handover.prot && - (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey || - wr->wr.sig_handover.prot->addr != wr->sg_list->addr || - wr->wr.sig_handover.prot->length != wr->sg_list->length)) - region_len += wr->wr.sig_handover.prot->length; + region_len = wr->wr.sg_list->length; + if (wr->prot && + (wr->prot->lkey != wr->wr.sg_list->lkey || + wr->prot->addr != wr->wr.sg_list->addr || + wr->prot->length != wr->wr.sg_list->length)) + region_len += wr->prot->length; /** * KLM octoword size - if protection was provided * then we use strided block format (3 octowords), * else we use single KLM (1 octoword) **/ - klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + klm_oct_size = wr->prot ? 3 : 1; - set_sig_umr_segment(*seg, wr, klm_oct_size); + set_sig_umr_segment(*seg, klm_oct_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) @@ -2445,38 +2430,52 @@ static int set_psv_wr(struct ib_sig_domain *domain, return 0; } -static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, - struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) +static int set_reg_wr(struct mlx5_ib_qp *qp, + struct ib_reg_wr *wr, + void **seg, int *size) { - int writ = 0; - int li; + struct mlx5_ib_mr *mr = to_mmr(wr->mr); + struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0; - if (unlikely(wr->send_flags & IB_SEND_INLINE)) + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Invalid IB_SEND_INLINE send flag\n"); return -EINVAL; + } - set_frwr_umr_segment(*seg, wr, li); + set_reg_umr_seg(*seg, mr); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - set_mkey_segment(*seg, wr, li, &writ); + + set_reg_mkey_seg(*seg, mr, wr->key, wr->access); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - if (!li) { - if (unlikely(wr->wr.fast_reg.page_list_len > - wr->wr.fast_reg.page_list->max_page_list_len)) - return -ENOMEM; - set_frwr_pages(*seg, wr, mdev, pd, writ); - *seg += sizeof(struct mlx5_wqe_data_seg); - *size += (sizeof(struct mlx5_wqe_data_seg) / 16); - } + set_reg_data_seg(*seg, mr, pd); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + return 0; } +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size) +{ + set_linv_umr_seg(*seg); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + set_linv_mkey_seg(*seg); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); +} + static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) { __be32 *p = NULL; @@ -2590,7 +2589,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; @@ -2639,7 +2637,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; - xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num); seg += sizeof(*xrc); size += sizeof(*xrc) / 16; /* fall through */ @@ -2648,8 +2645,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(seg, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); seg += sizeof(struct mlx5_wqe_raddr_seg); size += sizeof(struct mlx5_wqe_raddr_seg) / 16; break; @@ -2666,22 +2663,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } + set_linv_wr(qp, &seg, &size); num_sge = 0; break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR; - ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); - err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); + qp->sq.wr_data[idx] = IB_WR_REG_MR; + ctrl->imm = cpu_to_be32(reg_wr(wr)->key); + err = set_reg_wr(qp, reg_wr(wr), &seg, &size); if (err) { - mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } @@ -2690,7 +2681,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_REG_SIG_MR: qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; - mr = to_mmr(wr->wr.sig_handover.sig_mr); + mr = to_mmr(sig_handover_wr(wr)->sig_mr); ctrl->imm = cpu_to_be32(mr->ibmr.rkey); err = set_sig_umr_wr(wr, qp, &seg, &size); @@ -2718,7 +2709,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem, mr->sig->psv_memory.psv_idx, &seg, &size); if (err) { @@ -2740,7 +2731,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); if (err) { @@ -2764,8 +2755,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(seg, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); seg += sizeof(struct mlx5_wqe_raddr_seg); size += sizeof(struct mlx5_wqe_raddr_seg) / 16; break; @@ -2792,7 +2783,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; - ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); set_reg_umr_segment(seg, wr); seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; @@ -3009,7 +3000,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at ib_ah_attr->port_num = path->port; if (ib_ah_attr->port_num == 0 || - ib_ah_attr->port_num > dev->caps.gen.num_ports) + ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports)) return; ib_ah_attr->sl = path->sl & 0xf; @@ -3135,12 +3126,10 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_general_caps *gen; struct mlx5_ib_xrcd *xrcd; int err; - gen = &dev->mdev->caps.gen; - if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) + if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); diff --git a/kernel/drivers/infiniband/hw/mlx5/srq.c b/kernel/drivers/infiniband/hw/mlx5/srq.c index 02d77a297..e008505e9 100644 --- a/kernel/drivers/infiniband/hw/mlx5/srq.c +++ b/kernel/drivers/infiniband/hw/mlx5/srq.c @@ -165,7 +165,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, return err; } - if (mlx5_buf_alloc(dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) { mlx5_ib_dbg(dev, "buf alloc failed\n"); err = -ENOMEM; goto err_db; @@ -236,7 +236,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_general_caps *gen; struct mlx5_ib_srq *srq; int desc_size; int buf_size; @@ -245,13 +244,13 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, int uninitialized_var(inlen); int is_xrc; u32 flgs, xrcdn; + __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); - gen = &dev->mdev->caps.gen; /* Sanity check SRQ size before proceeding */ - if (init_attr->attr.max_wr >= gen->max_srq_wqes) { + if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, - gen->max_srq_wqes); + max_srq_wqes); return ERR_PTR(-EINVAL); } @@ -303,7 +302,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); in->ctx.db_record = cpu_to_be64(srq->db.dma); - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen); + err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc); kvfree(in); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); diff --git a/kernel/drivers/infiniband/hw/mthca/mthca_av.c b/kernel/drivers/infiniband/hw/mthca/mthca_av.c index 32f6c6315..bcac29404 100644 --- a/kernel/drivers/infiniband/hw/mthca/mthca_av.c +++ b/kernel/drivers/infiniband/hw/mthca/mthca_av.c @@ -281,7 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, ib_get_cached_gid(&dev->ib_dev, be32_to_cpu(ah->av->port_pd) >> 24, ah->av->gid_index % dev->limits.gid_table_len, - &header->grh.source_gid); + &header->grh.source_gid, NULL); memcpy(header->grh.destination_gid.raw, ah->av->dgid, 16); } diff --git a/kernel/drivers/infiniband/hw/mthca/mthca_cmd.c b/kernel/drivers/infiniband/hw/mthca/mthca_cmd.c index 9d3e5c1ac..c7f49bbb0 100644 --- a/kernel/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/kernel/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1858,8 +1858,8 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn) } int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { struct mthca_mailbox *inmailbox, *outmailbox; void *inbox; diff --git a/kernel/drivers/infiniband/hw/mthca/mthca_cmd.h b/kernel/drivers/infiniband/hw/mthca/mthca_cmd.h index f952244c5..d2e5b194b 100644 --- a/kernel/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/kernel/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -312,8 +312,8 @@ int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, struct mthca_mailbox *mailbox); int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn); int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); int mthca_READ_MGM(struct mthca_dev *dev, int index, struct mthca_mailbox *mailbox); int mthca_WRITE_MGM(struct mthca_dev *dev, int index, diff --git a/kernel/drivers/infiniband/hw/mthca/mthca_dev.h b/kernel/drivers/infiniband/hw/mthca/mthca_dev.h index 7e6a6d64a..4393a0228 100644 --- a/kernel/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/kernel/drivers/infiniband/hw/mthca/mthca_dev.h @@ -576,10 +576,11 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int mthca_create_agents(struct mthca_dev *dev); void mthca_free_agents(struct mthca_dev *dev); diff --git a/kernel/drivers/infiniband/hw/mthca/mthca_mad.c b/kernel/drivers/infiniband/hw/mthca/mthca_mad.c index 8881fa376..7c3f2fb44 100644 --- a/kernel/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/kernel/drivers/infiniband/hw/mthca/mthca_mad.c @@ -104,7 +104,7 @@ static void update_sm_ah(struct mthca_dev *dev, */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, - struct ib_mad *mad, + const struct ib_mad *mad, u16 prev_lid) { struct ib_event event; @@ -160,7 +160,7 @@ static void node_desc_override(struct ib_device *dev, static void forward_trap(struct mthca_dev *dev, u8 port_num, - struct ib_mad *mad) + const struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; struct ib_mad_send_buf *send_buf; @@ -170,7 +170,8 @@ static void forward_trap(struct mthca_dev *dev, if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; /* @@ -195,15 +196,22 @@ static void forward_trap(struct mthca_dev *dev, int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int err; u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/kernel/drivers/infiniband/hw/mthca/mthca_profile.c b/kernel/drivers/infiniband/hw/mthca/mthca_profile.c index 8edb28a9a..15d064479 100644 --- a/kernel/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/kernel/drivers/infiniband/hw/mthca/mthca_profile.c @@ -77,7 +77,6 @@ s64 mthca_make_profile(struct mthca_dev *dev, u64 mem_base, mem_avail; s64 total_size = 0; struct mthca_resource *profile; - struct mthca_resource tmp; int i, j; profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); @@ -136,11 +135,8 @@ s64 mthca_make_profile(struct mthca_dev *dev, */ for (i = MTHCA_RES_NUM; i > 0; --i) for (j = 1; j < i; ++j) { - if (profile[j].size > profile[j - 1].size) { - tmp = profile[j]; - profile[j] = profile[j - 1]; - profile[j - 1] = tmp; - } + if (profile[j].size > profile[j - 1].size) + swap(profile[j], profile[j - 1]); } for (i = 0; i < MTHCA_RES_NUM; ++i) { diff --git a/kernel/drivers/infiniband/hw/mthca/mthca_provider.c b/kernel/drivers/infiniband/hw/mthca/mthca_provider.c index 415f8e1a5..dc2d48c59 100644 --- a/kernel/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/kernel/drivers/infiniband/hw/mthca/mthca_provider.c @@ -57,14 +57,17 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } -static int mthca_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; struct mthca_dev *mdev = to_mdev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) @@ -94,6 +97,7 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; props->max_qp_wr = mdev->limits.max_wqes; props->max_sge = mdev->limits.max_sg; + props->max_sge_rd = props->max_sge; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; @@ -641,16 +645,20 @@ static int mthca_destroy_qp(struct ib_qp *qp) return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return ERR_PTR(-EINVAL); @@ -1244,6 +1252,24 @@ out: return err; } +static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mthca_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1323,6 +1349,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; + dev->ib_dev.get_port_immutable = mthca_port_immutable; if (dev->mthca_flags & MTHCA_FLAG_FMR) { dev->ib_dev.alloc_fmr = mthca_alloc_fmr; diff --git a/kernel/drivers/infiniband/hw/mthca/mthca_qp.c b/kernel/drivers/infiniband/hw/mthca/mthca_qp.c index e354b2f04..35fe506e2 100644 --- a/kernel/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/kernel/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1476,7 +1476,7 @@ void mthca_free_qp(struct mthca_dev *dev, /* Create UD header for an MLX send and build a data segment for it */ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, struct ib_send_wr *wr, + int ind, struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { @@ -1485,10 +1485,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, u16 pkey; ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, - mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, + mthca_ah_grh_present(to_mah(wr->ah)), 0, &sqp->ud_header); - err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); + err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header); if (err) return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); @@ -1499,7 +1499,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; - switch (wr->opcode) { + switch (wr->wr.opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -1507,7 +1507,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->ex.imm_data; + sqp->ud_header.immediate_data = wr->wr.ex.imm_data; break; default: return -EINVAL; @@ -1516,18 +1516,18 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - wr->wr.ud.pkey_index, &pkey); + wr->pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? - sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? + sqp->qkey : wr->remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, @@ -1569,34 +1569,34 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, } static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, - struct ib_send_wr *wr) + struct ib_atomic_wr *wr) { - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->compare = cpu_to_be64(wr->compare_add); } else { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->swap_add = cpu_to_be64(wr->compare_add); aseg->compare = 0; } } static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); - useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); - useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + useg->lkey = cpu_to_be32(to_mah(wr->ah)->key); + useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma); + useg->dqpn = cpu_to_be32(wr->remote_qpn); + useg->qkey = cpu_to_be32(wr->remote_qkey); } static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); - useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE); + useg->dqpn = cpu_to_be32(wr->remote_qpn); + useg->qkey = cpu_to_be32(wr->remote_qkey); } int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, @@ -1664,11 +1664,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -1677,8 +1677,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1694,8 +1694,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1708,13 +1708,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - set_tavor_ud_seg(wqe, wr); + set_tavor_ud_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mthca_tavor_ud_seg); size += sizeof (struct mthca_tavor_ud_seg) / 16; break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, wr, + err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), wqe - sizeof (struct mthca_next_seg), wqe); if (err) { @@ -2005,11 +2005,11 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -2018,8 +2018,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2035,8 +2035,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2049,13 +2049,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - set_arbel_ud_seg(wqe, wr); + set_arbel_ud_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mthca_arbel_ud_seg); size += sizeof (struct mthca_arbel_ud_seg) / 16; break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, wr, + err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), wqe - sizeof (struct mthca_next_seg), wqe); if (err) { diff --git a/kernel/drivers/infiniband/hw/nes/nes_cm.c b/kernel/drivers/infiniband/hw/nes/nes_cm.c index 72b43417c..8a3ad170d 100644 --- a/kernel/drivers/infiniband/hw/nes/nes_cm.c +++ b/kernel/drivers/infiniband/hw/nes/nes_cm.c @@ -1520,8 +1520,9 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi int rc = arpindex; struct net_device *netdev; struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + __be32 dst_ipaddr = htonl(dst_ip); - rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0); + rt = ip_route_output(&init_net, dst_ipaddr, nesvnic->local_ipaddr, 0, 0); if (IS_ERR(rt)) { printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", __func__, dst_ip); @@ -1533,7 +1534,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi else netdev = nesvnic->netdev; - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); + neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr); rcu_read_lock(); if (neigh) { @@ -1616,6 +1617,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, &cm_node->loc_addr, cm_node->loc_port, &cm_node->rem_addr, cm_node->rem_port); cm_node->listener = listener; + if (listener) + cm_node->tos = listener->tos; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); @@ -2938,6 +2941,9 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT); + nesqp->nesqp_context->misc2 |= cpu_to_le32( + cm_node->tos << NES_QPCONTEXT_MISC2_TOS_SHIFT); + nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( @@ -3612,6 +3618,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->ord_size = 1; cm_node->apbvt_set = apbvt_set; + cm_node->tos = cm_id->tos; nesqp->cm_node = cm_node; cm_node->nesqp = nesqp; nes_add_ref(&nesqp->ibqp); @@ -3666,6 +3673,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) } cm_id->provider_data = cm_node; + cm_node->tos = cm_id->tos; if (!cm_node->reused_node) { if (nes_create_mapinfo(&cm_info)) diff --git a/kernel/drivers/infiniband/hw/nes/nes_cm.h b/kernel/drivers/infiniband/hw/nes/nes_cm.h index f522cf639..32a6420c2 100644 --- a/kernel/drivers/infiniband/hw/nes/nes_cm.h +++ b/kernel/drivers/infiniband/hw/nes/nes_cm.h @@ -303,6 +303,7 @@ struct nes_cm_listener { int backlog; enum nes_cm_listener_state listener_state; u32 reused_node; + u8 tos; }; /* per connection node and node state information */ @@ -352,6 +353,7 @@ struct nes_cm_node { struct list_head reset_entry; struct nes_qp *nesqp; atomic_t passive_state; + u8 tos; }; /* structure for client or CM to fill when making CM api calls. */ diff --git a/kernel/drivers/infiniband/hw/nes/nes_hw.c b/kernel/drivers/infiniband/hw/nes/nes_hw.c index 02120d340..4713dd7ed 100644 --- a/kernel/drivers/infiniband/hw/nes/nes_hw.c +++ b/kernel/drivers/infiniband/hw/nes/nes_hw.c @@ -3861,7 +3861,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( - (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]); + (((u32)mac_addr[0]) << 8) | (u32)mac_addr[1]); } else { cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0; diff --git a/kernel/drivers/infiniband/hw/nes/nes_hw.h b/kernel/drivers/infiniband/hw/nes/nes_hw.h index d748e4b31..c9080208a 100644 --- a/kernel/drivers/infiniband/hw/nes/nes_hw.h +++ b/kernel/drivers/infiniband/hw/nes/nes_hw.h @@ -1200,12 +1200,6 @@ struct nes_fast_mr_wqe_pbl { dma_addr_t paddr; }; -struct nes_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - struct nes_fast_mr_wqe_pbl nes_wqe_pbl; - u64 pbl; -}; - struct nes_listener { struct work_struct work; struct workqueue_struct *wq; diff --git a/kernel/drivers/infiniband/hw/nes/nes_nic.c b/kernel/drivers/infiniband/hw/nes/nes_nic.c index 70acda91e..6a0bdfa0c 100644 --- a/kernel/drivers/infiniband/hw/nes/nes_nic.c +++ b/kernel/drivers/infiniband/hw/nes/nes_nic.c @@ -1325,9 +1325,6 @@ static void nes_netdev_get_drvinfo(struct net_device *netdev, "%u.%u", nesadapter->firmware_version >> 16, nesadapter->firmware_version & 0x000000ff); strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); - drvinfo->testinfo_len = 0; - drvinfo->eedump_len = 0; - drvinfo->regdump_len = 0; } diff --git a/kernel/drivers/infiniband/hw/nes/nes_verbs.c b/kernel/drivers/infiniband/hw/nes/nes_verbs.c index c0d0296e7..137880a19 100644 --- a/kernel/drivers/infiniband/hw/nes/nes_verbs.c +++ b/kernel/drivers/infiniband/hw/nes/nes_verbs.c @@ -51,6 +51,7 @@ atomic_t qps_created; atomic_t sw_qps_destroyed; static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); +static int nes_dereg_mr(struct ib_mr *ib_mr); /** * nes_alloc_mw @@ -375,9 +376,11 @@ static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, } /* - * nes_alloc_fast_reg_mr + * nes_alloc_mr */ -static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list_len) +static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, + enum ib_mr_type mr_type, + u32 max_num_sg) { struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); @@ -393,11 +396,18 @@ static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list u32 stag; int ret; struct ib_mr *ibmr; + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) + return ERR_PTR(-E2BIG); + /* * Note: Set to always use a fixed length single page entry PBL. This is to allow * for the fast_reg_mr operation to always know the size of the PBL. */ - if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) + if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) return ERR_PTR(-E2BIG); get_random_bytes(&next_stag_index, sizeof(next_stag_index)); @@ -424,7 +434,7 @@ static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n", stag, stag_index); - ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len); + ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_num_sg); if (ret == 0) { nesmr->ibmr.rkey = stag; @@ -434,90 +444,61 @@ static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list } else { kfree(nesmr); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); + return ERR_PTR(-ENOMEM); } + + nesmr->pages = pci_alloc_consistent(nesdev->pcidev, + max_num_sg * sizeof(u64), + &nesmr->paddr); + if (!nesmr->paddr) + goto err; + + nesmr->max_pages = max_num_sg; + return ibmr; -} -/* - * nes_alloc_fast_reg_page_list - */ -static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list( - struct ib_device *ibdev, - int page_list_len) -{ - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct ib_fast_reg_page_list *pifrpl; - struct nes_ib_fast_reg_page_list *pnesfrpl; +err: + nes_dereg_mr(ibmr); - if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) - return ERR_PTR(-E2BIG); - /* - * Allocate the ib_fast_reg_page_list structure, the - * nes_fast_bpl structure, and the PLB table. - */ - pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) + - page_list_len * sizeof(u64), GFP_KERNEL); + return ERR_PTR(-ENOMEM); +} - if (!pnesfrpl) - return ERR_PTR(-ENOMEM); +static int nes_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct nes_mr *nesmr = to_nesmr(ibmr); - pifrpl = &pnesfrpl->ibfrpl; - pifrpl->page_list = &pnesfrpl->pbl; - pifrpl->max_page_list_len = page_list_len; - /* - * Allocate the WQE PBL - */ - pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev, - page_list_len * sizeof(u64), - &pnesfrpl->nes_wqe_pbl.paddr); + if (unlikely(nesmr->npages == nesmr->max_pages)) + return -ENOMEM; - if (!pnesfrpl->nes_wqe_pbl.kva) { - kfree(pnesfrpl); - return ERR_PTR(-ENOMEM); - } - nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, " - "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, " - "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl, - pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva, - (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr); + nesmr->pages[nesmr->npages++] = cpu_to_le64(addr); - return pifrpl; + return 0; } -/* - * nes_free_fast_reg_page_list - */ -static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl) +static int nes_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_ib_fast_reg_page_list *pnesfrpl; + struct nes_mr *nesmr = to_nesmr(ibmr); - pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl); - /* - * Free the WQE PBL. - */ - pci_free_consistent(nesdev->pcidev, - pifrpl->max_page_list_len * sizeof(u64), - pnesfrpl->nes_wqe_pbl.kva, - pnesfrpl->nes_wqe_pbl.paddr); - /* - * Free the PBL structure - */ - kfree(pnesfrpl); + nesmr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page); } /** * nes_query_device */ -static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_ib_device *nesibdev = nesvnic->nesibdev; + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(props, 0, sizeof(*props)); memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6); @@ -606,7 +587,6 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr return 0; } - /** * nes_query_pkey */ @@ -1527,10 +1507,12 @@ static int nes_destroy_qp(struct ib_qp *ibqp) /** * nes_create_cq */ -static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, - struct ib_ucontext *context, struct ib_udata *udata) +static struct ib_cq *nes_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) { + int entries = attr->cqe; u64 u64temp; struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; @@ -1550,6 +1532,9 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, unsigned long flags; int ret; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries > nesadapter->max_cqe) return ERR_PTR(-EINVAL); @@ -2666,6 +2651,13 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) u16 major_code; u16 minor_code; + + if (nesmr->pages) + pci_free_consistent(nesdev->pcidev, + nesmr->max_pages * sizeof(u64), + nesmr->pages, + nesmr->paddr); + if (nesmr->region) { ib_umem_release(nesmr->region); } @@ -3222,8 +3214,10 @@ static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) * nes_process_mad */ static int nes_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { nes_debug(NES_DBG_INIT, "\n"); return -ENOSYS; @@ -3353,9 +3347,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - ib_wr->wr.rdma.rkey); + rdma_wr(ib_wr)->rkey); set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - ib_wr->wr.rdma.remote_addr); + rdma_wr(ib_wr)->remote_addr); if ((ib_wr->send_flags & IB_SEND_INLINE) && ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && @@ -3390,9 +3384,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, } set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - ib_wr->wr.rdma.remote_addr); + rdma_wr(ib_wr)->remote_addr); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - ib_wr->wr.rdma.rkey); + rdma_wr(ib_wr)->rkey); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, ib_wr->sg_list->length); set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, @@ -3406,19 +3400,13 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX, ib_wr->ex.invalidate_rkey); break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: { - int i; - int flags = ib_wr->wr.fast_reg.access_flags; - struct nes_ib_fast_reg_page_list *pnesfrpl = - container_of(ib_wr->wr.fast_reg.page_list, - struct nes_ib_fast_reg_page_list, - ibfrpl); - u64 *src_page_list = pnesfrpl->ibfrpl.page_list; - u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva; - - if (ib_wr->wr.fast_reg.page_list_len > - (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { + struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr); + int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size); + int flags = reg_wr(ib_wr)->access; + + if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); err = -EINVAL; break; @@ -3426,19 +3414,19 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc = NES_IWARP_SQ_OP_FAST_REG; set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, - ib_wr->wr.fast_reg.iova_start); + mr->ibmr.iova); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, - ib_wr->wr.fast_reg.length); + mr->ibmr.length); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, - ib_wr->wr.fast_reg.rkey); - /* Set page size: */ - if (ib_wr->wr.fast_reg.page_shift == 12) { + reg_wr(ib_wr)->key); + + if (page_shift == 12) { wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; - } else if (ib_wr->wr.fast_reg.page_shift == 21) { + } else if (page_shift == 21) { wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; } else { nes_debug(NES_DBG_IW_TX, "Invalid page shift," @@ -3446,6 +3434,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, err = -EINVAL; break; } + /* Set access_flags */ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ; if (flags & IB_ACCESS_LOCAL_WRITE) @@ -3461,35 +3450,22 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; /* Fill in PBL info: */ - if (ib_wr->wr.fast_reg.page_list_len > - pnesfrpl->ibfrpl.max_page_list_len) { - nes_debug(NES_DBG_IW_TX, "Invalid page list length," - " ib_wr=%p, value=%u, max=%u\n", - ib_wr, ib_wr->wr.fast_reg.page_list_len, - pnesfrpl->ibfrpl.max_page_list_len); - err = -EINVAL; - break; - } - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX, - pnesfrpl->nes_wqe_pbl.paddr); + mr->paddr); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, - ib_wr->wr.fast_reg.page_list_len * 8); + mr->npages * 8); - for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++) - dst_page_list[i] = cpu_to_le64(src_page_list[i]); - - nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " + nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, " "length: %d, rkey: %0x, pgl_paddr: %llx, " "page_list_len: %u, wqe_misc: %x\n", - (unsigned long long) ib_wr->wr.fast_reg.iova_start, - ib_wr->wr.fast_reg.length, - ib_wr->wr.fast_reg.rkey, - (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, - ib_wr->wr.fast_reg.page_list_len, + (unsigned long long) mr->ibmr.iova, + mr->ibmr.length, + reg_wr(ib_wr)->key, + (unsigned long long) mr->paddr, + mr->npages, wqe_misc); break; } @@ -3732,7 +3708,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) entry->opcode = IB_WC_LOCAL_INV; break; case NES_IWARP_SQ_OP_FAST_REG: - entry->opcode = IB_WC_FAST_REG_MR; + entry->opcode = IB_WC_REG_MR; break; } @@ -3828,6 +3804,22 @@ static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_ return 0; } +static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = nes_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} /** * nes_init_ofa_device @@ -3903,9 +3895,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; nesibdev->ibdev.bind_mw = nes_bind_mw; - nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr; - nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list; - nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list; + nesibdev->ibdev.alloc_mr = nes_alloc_mr; + nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; nesibdev->ibdev.attach_mcast = nes_multicast_attach; nesibdev->ibdev.detach_mcast = nes_multicast_detach; @@ -3928,6 +3919,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->reject = nes_reject; nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; + nesibdev->ibdev.get_port_immutable = nes_port_immutable; return nesibdev; } diff --git a/kernel/drivers/infiniband/hw/nes/nes_verbs.h b/kernel/drivers/infiniband/hw/nes/nes_verbs.h index 309b31c31..a204b677a 100644 --- a/kernel/drivers/infiniband/hw/nes/nes_verbs.h +++ b/kernel/drivers/infiniband/hw/nes/nes_verbs.h @@ -79,6 +79,10 @@ struct nes_mr { u16 pbls_used; u8 mode; u8 pbl_4k; + __le64 *pages; + dma_addr_t paddr; + u32 max_pages; + u32 npages; }; struct nes_hw_pb { diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma.h b/kernel/drivers/infiniband/hw/ocrdma/ocrdma.h index b396344fa..040bb8b5c 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_H__ #define __OCRDMA_H__ @@ -40,7 +55,7 @@ #include <be_roce.h> #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0" +#define OCRDMA_ROCE_DRV_VERSION "11.0.0.0" #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" @@ -178,6 +193,8 @@ struct ocrdma_mr { struct ib_mr ibmr; struct ib_umem *umem; struct ocrdma_hw_mr hwmr; + u64 *pages; + u32 npages; }; struct ocrdma_stats { @@ -215,6 +232,10 @@ struct phy_info { u16 interface_type; }; +enum ocrdma_flags { + OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01 +}; + struct ocrdma_dev { struct ib_device ibdev; struct ocrdma_dev_attr attr; @@ -231,7 +252,6 @@ struct ocrdma_dev { u16 base_eqid; u16 max_eq; - union ib_gid *sgid_tbl; /* provided synchronization to sgid table for * updating gid entries triggered by notifier. */ @@ -264,7 +284,6 @@ struct ocrdma_dev { u32 hba_port_num; struct list_head entry; - struct rcu_head rcu; int id; u64 *stag_arr; u8 sl; /* service level */ @@ -272,6 +291,7 @@ struct ocrdma_dev { atomic_t update_sl; u16 pvid; u32 asic_id; + u32 flags; ulong last_stats_time; struct mutex stats_lock; /* provide synch for debugfs operations */ @@ -576,4 +596,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state) (state & OCRDMA_STATE_FLAG_SYNC); } +static inline u8 ocrdma_get_ae_link_state(u32 ae_state) +{ + return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT); +} + #endif diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index 1554cca57..430b1350f 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_ABI_H__ #define __OCRDMA_ABI_H__ diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index f5a5ea836..9820074be 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,13 +38,14 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <net/neighbour.h> #include <net/netevent.h> #include <rdma/ib_addr.h> #include <rdma/ib_mad.h> +#include <rdma/ib_cache.h> #include "ocrdma.h" #include "ocrdma_verbs.h" @@ -41,10 +57,9 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, union ib_gid *sgid, - int pdid, bool *isvlan) + int pdid, bool *isvlan, u16 vlan_tag) { int status = 0; - u16 vlan_tag; struct ocrdma_eth_vlan eth; struct ocrdma_grh grh; int eth_sz; @@ -53,7 +68,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, memset(&grh, 0, sizeof(grh)); /* VLAN */ - vlan_tag = attr->vlan_id; if (!vlan_tag || (vlan_tag > 0xFFF)) vlan_tag = dev->pvid; if (vlan_tag || dev->pfc_state) { @@ -100,9 +114,11 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) { u32 *ahid_addr; - bool isvlan = false; int status; struct ocrdma_ah *ah; + bool isvlan = false; + u16 vlan_tag = 0xffff; + struct ib_gid_attr sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); union ib_gid sgid; @@ -120,18 +136,25 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (status) goto av_err; - status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid); + status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid, + &sgid_attr); if (status) { pr_err("%s(): Failed to query sgid, status = %d\n", __func__, status); goto av_conf_err; } + if (sgid_attr.ndev) { + if (is_vlan_dev(sgid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); + dev_put(sgid_attr.ndev); + } if ((pd->uctx) && (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) { status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, - attr->dmac, &attr->vlan_id); + attr->dmac, &vlan_tag, + sgid_attr.ndev->ifindex); if (status) { pr_err("%s(): Failed to resolve dmac from gid." "status = %d\n", __func__, status); @@ -139,7 +162,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) } } - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan); + status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); if (status) goto av_conf_err; @@ -204,12 +227,20 @@ int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int status; struct ocrdma_dev *dev; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 726a87cf2..04a30ae67 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_AH_H__ #define __OCRDMA_AH_H__ @@ -42,7 +57,9 @@ int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *); int ocrdma_process_mad(struct ib_device *, int process_mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); #endif /* __OCRDMA_AH_H__ */ diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 47615ff33..283ca842f 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) CNA Adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <linux/sched.h> #include <linux/interrupt.h> @@ -32,6 +47,7 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_cache.h> #include "ocrdma.h" #include "ocrdma_hw.h" @@ -563,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE); cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE); + /* Request link events on this MQ. */ + cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE); cmd->async_cqid_ringsize = cq->id; cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << @@ -663,11 +681,33 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, int dev_event = 0; int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >> OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT; + u16 qpid = cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK; + u16 cqid = cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK; + + /* + * Some FW version returns wrong qp or cq ids in CQEs. + * Checking whether the IDs are valid + */ + + if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) { + if (qpid < dev->attr.max_qp) + qp = dev->qp_tbl[qpid]; + if (qp == NULL) { + pr_err("ocrdma%d:Async event - qpid %u is not valid\n", + dev->id, qpid); + return; + } + } - if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) - qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK]; - if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) - cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; + if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) { + if (cqid < dev->attr.max_cq) + cq = dev->cq_tbl[cqid]; + if (cq == NULL) { + pr_err("ocrdma%d:Async event - cqid %u is not valid\n", + dev->id, cqid); + return; + } + } memset(&ib_evt, 0, sizeof(ib_evt)); @@ -781,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, } } +static void ocrdma_process_link_state(struct ocrdma_dev *dev, + struct ocrdma_ae_mcqe *cqe) +{ + struct ocrdma_ae_lnkst_mcqe *evt; + u8 lstate; + + evt = (struct ocrdma_ae_lnkst_mcqe *)cqe; + lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn); + + if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK)) + return; + + if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT) + ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK)); +} + static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) { /* async CQE processing */ struct ocrdma_ae_mcqe *cqe = ae_cqe; u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >> OCRDMA_AE_MCQE_EVENT_CODE_SHIFT; - - if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE) + switch (evt_code) { + case OCRDMA_ASYNC_LINK_EVE_CODE: + ocrdma_process_link_state(dev, cqe); + break; + case OCRDMA_ASYNC_RDMA_EVE_CODE: ocrdma_dispatch_ibevent(dev, cqe); - else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE) + break; + case OCRDMA_ASYNC_GRP5_EVE_CODE: ocrdma_process_grp5_aync(dev, cqe); - else + break; + default: pr_err("%s(%d) invalid evt code=0x%x\n", __func__, dev->id, evt_code); + } } static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) @@ -1325,7 +1387,8 @@ mbx_err: return status; } -int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed, + u8 *lnk_state) { int status = -ENOMEM; struct ocrdma_get_link_speed_rsp *rsp; @@ -1346,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) goto mbx_err; rsp = (struct ocrdma_get_link_speed_rsp *)cmd; - *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) - >> OCRDMA_PHY_PS_SHIFT; + if (lnk_speed) + *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) + >> OCRDMA_PHY_PS_SHIFT; + if (lnk_state) + *lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK); mbx_err: kfree(cmd); @@ -2433,6 +2499,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; union ib_gid sgid, zgid; + struct ib_gid_attr sgid_attr; u32 vlan_id = 0xFFFF; u8 mac_addr[6]; struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); @@ -2451,10 +2518,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); - status = ocrdma_query_gid(&dev->ibdev, 1, - ah_attr->grh.sgid_index, &sgid); - if (status) - return status; + + status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index, + &sgid, &sgid_attr); + if (!status && sgid_attr.ndev) { + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(sgid_attr.ndev); + } memset(&zgid, 0, sizeof(zgid)); if (!memcmp(&sgid, &zgid, sizeof(zgid))) @@ -2471,17 +2542,16 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); - if (attr_mask & IB_QP_VID) { - vlan_id = attrs->vlan_id; - } else if (dev->pfc_state) { - vlan_id = 0; - pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", - dev->id); - pr_err("ocrdma%d:Using VLAN 0 for this connection\n", - dev->id); - } - if (vlan_id < 0x1000) { + if (vlan_id == 0xFFFF) + vlan_id = 0; + if (vlan_id || dev->pfc_state) { + if (!vlan_id) { + pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", + dev->id); + pr_err("ocrdma%d:Using VLAN 0 for this connection\n", + dev->id); + } cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index e905972fc..ebc1f442a 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) CNA Adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_HW_H__ #define __OCRDMA_HW_H__ @@ -91,7 +106,8 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed, bool solicited, u16 cqe_popped); /* verbs specific mailbox commands */ -int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed); +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed, + u8 *lnk_st); int ocrdma_query_config(struct ocrdma_dev *, struct ocrdma_mbx_query_config *config); @@ -138,5 +154,6 @@ char *port_speed_string(struct ocrdma_dev *dev); void ocrdma_init_service_level(struct ocrdma_dev *); void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev); void ocrdma_free_pd_range(struct ocrdma_dev *dev); +void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate); #endif /* __OCRDMA_HW_H__ */ diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7a2b59aca..3afb40b85 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,13 +38,14 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <linux/module.h> #include <linux/idr.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_addr.h> +#include <rdma/ib_mad.h> #include <linux/netdevice.h> #include <net/addrconf.h> @@ -45,14 +61,10 @@ MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); -static LIST_HEAD(ocrdma_dev_list); -static DEFINE_SPINLOCK(ocrdma_devlist_lock); static DEFINE_IDR(ocrdma_dev_id); -static union ib_gid ocrdma_zero_sgid; - void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) { u8 mac_addr[6]; @@ -67,139 +79,28 @@ void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) guid[6] = mac_addr[4]; guid[7] = mac_addr[5]; } - -static bool ocrdma_add_sgid(struct ocrdma_dev *dev, union ib_gid *new_sgid) -{ - int i; - unsigned long flags; - - memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid)); - - - spin_lock_irqsave(&dev->sgid_lock, flags); - for (i = 0; i < OCRDMA_MAX_SGID; i++) { - if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid, - sizeof(union ib_gid))) { - /* found free entry */ - memcpy(&dev->sgid_tbl[i], new_sgid, - sizeof(union ib_gid)); - spin_unlock_irqrestore(&dev->sgid_lock, flags); - return true; - } else if (!memcmp(&dev->sgid_tbl[i], new_sgid, - sizeof(union ib_gid))) { - /* entry already present, no addition is required. */ - spin_unlock_irqrestore(&dev->sgid_lock, flags); - return false; - } - } - spin_unlock_irqrestore(&dev->sgid_lock, flags); - return false; -} - -static bool ocrdma_del_sgid(struct ocrdma_dev *dev, union ib_gid *sgid) -{ - int found = false; - int i; - unsigned long flags; - - - spin_lock_irqsave(&dev->sgid_lock, flags); - /* first is default sgid, which cannot be deleted. */ - for (i = 1; i < OCRDMA_MAX_SGID; i++) { - if (!memcmp(&dev->sgid_tbl[i], sgid, sizeof(union ib_gid))) { - /* found matching entry */ - memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid)); - found = true; - break; - } - } - spin_unlock_irqrestore(&dev->sgid_lock, flags); - return found; -} - -static int ocrdma_addr_event(unsigned long event, struct net_device *netdev, - union ib_gid *gid) -{ - struct ib_event gid_event; - struct ocrdma_dev *dev; - bool found = false; - bool updated = false; - bool is_vlan = false; - - is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN; - if (is_vlan) - netdev = rdma_vlan_dev_real_dev(netdev); - - rcu_read_lock(); - list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) { - if (dev->nic_info.netdev == netdev) { - found = true; - break; - } - } - rcu_read_unlock(); - - if (!found) - return NOTIFY_DONE; - - mutex_lock(&dev->dev_lock); - switch (event) { - case NETDEV_UP: - updated = ocrdma_add_sgid(dev, gid); - break; - case NETDEV_DOWN: - updated = ocrdma_del_sgid(dev, gid); - break; - default: - break; - } - if (updated) { - /* GID table updated, notify the consumers about it */ - gid_event.device = &dev->ibdev; - gid_event.element.port_num = 1; - gid_event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&gid_event); - } - mutex_unlock(&dev->dev_lock); - return NOTIFY_OK; -} - -static int ocrdma_inetaddr_event(struct notifier_block *notifier, - unsigned long event, void *ptr) +static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device, + u8 port_num) { - struct in_ifaddr *ifa = ptr; - union ib_gid gid; - struct net_device *netdev = ifa->ifa_dev->dev; - - ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); - return ocrdma_addr_event(event, netdev, &gid); + return IB_LINK_LAYER_ETHERNET; } -static struct notifier_block ocrdma_inetaddr_notifier = { - .notifier_call = ocrdma_inetaddr_event -}; - -#if IS_ENABLED(CONFIG_IPV6) - -static int ocrdma_inet6addr_event(struct notifier_block *notifier, - unsigned long event, void *ptr) +static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) { - struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; - union ib_gid *gid = (union ib_gid *)&ifa->addr; - struct net_device *netdev = ifa->idev->dev; - return ocrdma_addr_event(event, netdev, gid); -} + struct ib_port_attr attr; + int err; -static struct notifier_block ocrdma_inet6addr_notifier = { - .notifier_call = ocrdma_inet6addr_event -}; + err = ocrdma_query_port(ibdev, port_num, &attr); + if (err) + return err; -#endif /* IPV6 and VLAN */ + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; -static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device, - u8 port_num) -{ - return IB_LINK_LAYER_ETHERNET; + return 0; } static int ocrdma_register_device(struct ocrdma_dev *dev) @@ -246,6 +147,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.query_port = ocrdma_query_port; dev->ibdev.modify_port = ocrdma_modify_port; dev->ibdev.query_gid = ocrdma_query_gid; + dev->ibdev.get_netdev = ocrdma_get_netdev; + dev->ibdev.add_gid = ocrdma_add_gid; + dev->ibdev.del_gid = ocrdma_del_gid; dev->ibdev.get_link_layer = ocrdma_link_layer; dev->ibdev.alloc_pd = ocrdma_alloc_pd; dev->ibdev.dealloc_pd = ocrdma_dealloc_pd; @@ -275,9 +179,8 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.dereg_mr = ocrdma_dereg_mr; dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; - dev->ibdev.alloc_fast_reg_mr = ocrdma_alloc_frmr; - dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list; - dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list; + dev->ibdev.alloc_mr = ocrdma_alloc_mr; + dev->ibdev.map_mr_sg = ocrdma_map_mr_sg; /* mandatory to support user space verbs consumer. */ dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; @@ -286,6 +189,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.dma_device = &dev->nic_info.pdev->dev; dev->ibdev.process_mad = ocrdma_process_mad; + dev->ibdev.get_port_immutable = ocrdma_port_immutable; if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= @@ -307,12 +211,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) static int ocrdma_alloc_resources(struct ocrdma_dev *dev) { mutex_init(&dev->dev_lock); - dev->sgid_tbl = kzalloc(sizeof(union ib_gid) * - OCRDMA_MAX_SGID, GFP_KERNEL); - if (!dev->sgid_tbl) - goto alloc_err; - spin_lock_init(&dev->sgid_lock); - dev->cq_tbl = kzalloc(sizeof(struct ocrdma_cq *) * OCRDMA_MAX_CQ, GFP_KERNEL); if (!dev->cq_tbl) @@ -344,7 +242,6 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev) kfree(dev->stag_arr); kfree(dev->qp_tbl); kfree(dev->cq_tbl); - kfree(dev->sgid_tbl); } /* OCRDMA sysfs interface */ @@ -390,71 +287,10 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); } -static void ocrdma_add_default_sgid(struct ocrdma_dev *dev) -{ - /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */ - union ib_gid *sgid = &dev->sgid_tbl[0]; - - sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); - ocrdma_get_guid(dev, &sgid->raw[8]); -} - -static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev, - struct net_device *net) -{ - struct in_device *in_dev; - union ib_gid gid; - in_dev = in_dev_get(net); - if (in_dev) { - for_ifa(in_dev) { - ipv6_addr_set_v4mapped(ifa->ifa_address, - (struct in6_addr *)&gid); - ocrdma_add_sgid(dev, &gid); - } - endfor_ifa(in_dev); - in_dev_put(in_dev); - } -} - -static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev, - struct net_device *net) -{ -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_dev *in6_dev; - union ib_gid *pgid; - struct inet6_ifaddr *ifp; - in6_dev = in6_dev_get(net); - if (in6_dev) { - read_lock_bh(&in6_dev->lock); - list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { - pgid = (union ib_gid *)&ifp->addr; - ocrdma_add_sgid(dev, pgid); - } - read_unlock_bh(&in6_dev->lock); - in6_dev_put(in6_dev); - } -#endif -} - -static void ocrdma_init_gid_table(struct ocrdma_dev *dev) -{ - struct net_device *net_dev; - - for_each_netdev(&init_net, net_dev) { - struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ? - rdma_vlan_dev_real_dev(net_dev) : net_dev; - - if (real_dev == dev->nic_info.netdev) { - ocrdma_add_default_sgid(dev); - ocrdma_init_ipv4_gids(dev, net_dev); - ocrdma_init_ipv6_gids(dev, net_dev); - } - } -} - static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { int status = 0, i; + u8 lstate = 0; struct ocrdma_dev *dev; dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); @@ -480,17 +316,18 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) goto alloc_err; ocrdma_init_service_level(dev); - ocrdma_init_gid_table(dev); status = ocrdma_register_device(dev); if (status) goto alloc_err; + /* Query Link state and update */ + status = ocrdma_mbx_get_link_speed(dev, NULL, &lstate); + if (!status) + ocrdma_update_link_state(dev, lstate); + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) goto sysfs_err; - spin_lock(&ocrdma_devlist_lock); - list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); - spin_unlock(&ocrdma_devlist_lock); /* Init stats */ ocrdma_add_port_stats(dev); /* Interrupt Moderation */ @@ -519,9 +356,8 @@ idr_err: return NULL; } -static void ocrdma_remove_free(struct rcu_head *rcu) +static void ocrdma_remove_free(struct ocrdma_dev *dev) { - struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu); idr_remove(&ocrdma_dev_id, dev->id); kfree(dev->mbx_cmd); @@ -538,18 +374,12 @@ static void ocrdma_remove(struct ocrdma_dev *dev) ib_unregister_device(&dev->ibdev); ocrdma_rem_port_stats(dev); - - spin_lock(&ocrdma_devlist_lock); - list_del_rcu(&dev->entry); - spin_unlock(&ocrdma_devlist_lock); - ocrdma_free_resources(dev); ocrdma_cleanup_hw(dev); - - call_rcu(&dev->rcu, ocrdma_remove_free); + ocrdma_remove_free(dev); } -static int ocrdma_open(struct ocrdma_dev *dev) +static int ocrdma_dispatch_port_active(struct ocrdma_dev *dev) { struct ib_event port_event; @@ -560,32 +390,9 @@ static int ocrdma_open(struct ocrdma_dev *dev) return 0; } -static int ocrdma_close(struct ocrdma_dev *dev) +static int ocrdma_dispatch_port_error(struct ocrdma_dev *dev) { - int i; - struct ocrdma_qp *qp, **cur_qp; struct ib_event err_event; - struct ib_qp_attr attrs; - int attr_mask = IB_QP_STATE; - - attrs.qp_state = IB_QPS_ERR; - mutex_lock(&dev->dev_lock); - if (dev->qp_tbl) { - cur_qp = dev->qp_tbl; - for (i = 0; i < OCRDMA_MAX_QP; i++) { - qp = cur_qp[i]; - if (qp && qp->ibqp.qp_type != IB_QPT_GSI) { - /* change the QP state to ERROR */ - _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask); - - err_event.event = IB_EVENT_QP_FATAL; - err_event.element.qp = &qp->ibqp; - err_event.device = &dev->ibdev; - ib_dispatch_event(&err_event); - } - } - } - mutex_unlock(&dev->dev_lock); err_event.event = IB_EVENT_PORT_ERR; err_event.element.port_num = 1; @@ -596,7 +403,7 @@ static int ocrdma_close(struct ocrdma_dev *dev) static void ocrdma_shutdown(struct ocrdma_dev *dev) { - ocrdma_close(dev); + ocrdma_dispatch_port_error(dev); ocrdma_remove(dev); } @@ -607,16 +414,26 @@ static void ocrdma_shutdown(struct ocrdma_dev *dev) static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event) { switch (event) { - case BE_DEV_UP: - ocrdma_open(dev); - break; - case BE_DEV_DOWN: - ocrdma_close(dev); - break; case BE_DEV_SHUTDOWN: ocrdma_shutdown(dev); break; + default: + break; + } +} + +void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate) +{ + if (!(dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)) { + dev->flags |= OCRDMA_FLAGS_LINK_STATUS_INIT; + if (!lstate) + return; } + + if (!lstate) + ocrdma_dispatch_port_error(dev); + else + ocrdma_dispatch_port_active(dev); } static struct ocrdma_driver ocrdma_drv = { @@ -627,34 +444,12 @@ static struct ocrdma_driver ocrdma_drv = { .be_abi_version = OCRDMA_BE_ROCE_ABI_VERSION, }; -static void ocrdma_unregister_inet6addr_notifier(void) -{ -#if IS_ENABLED(CONFIG_IPV6) - unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier); -#endif -} - -static void ocrdma_unregister_inetaddr_notifier(void) -{ - unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier); -} - static int __init ocrdma_init_module(void) { int status; ocrdma_init_debugfs(); - status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier); - if (status) - return status; - -#if IS_ENABLED(CONFIG_IPV6) - status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier); - if (status) - goto err_notifier6; -#endif - status = be_roce_register_driver(&ocrdma_drv); if (status) goto err_be_reg; @@ -662,20 +457,15 @@ static int __init ocrdma_init_module(void) return 0; err_be_reg: -#if IS_ENABLED(CONFIG_IPV6) - ocrdma_unregister_inet6addr_notifier(); -err_notifier6: -#endif - ocrdma_unregister_inetaddr_notifier(); + return status; } static void __exit ocrdma_exit_module(void) { be_roce_unregister_driver(&ocrdma_drv); - ocrdma_unregister_inet6addr_notifier(); - ocrdma_unregister_inetaddr_notifier(); ocrdma_rem_debugfs(); + idr_destroy(&ocrdma_dev_id); } module_init(ocrdma_init_module); diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 02ad0aee9..99dd6fdf0 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_SLI_H__ #define __OCRDMA_SLI_H__ @@ -125,6 +140,8 @@ enum { OCRDMA_DB_RQ_SHIFT = 24 }; +#define OCRDMA_ROUDP_FLAGS_SHIFT 0x03 + #define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ #define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ /* qid #2 msbits at 12-11 */ @@ -448,8 +465,11 @@ struct ocrdma_ae_qp_mcqe { u32 valid_ae_event; }; -#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 -#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 +enum ocrdma_async_event_code { + OCRDMA_ASYNC_LINK_EVE_CODE = 0x01, + OCRDMA_ASYNC_GRP5_EVE_CODE = 0x05, + OCRDMA_ASYNC_RDMA_EVE_CODE = 0x14 +}; enum ocrdma_async_grp5_events { OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01, @@ -472,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE { OCRDMA_MAX_ASYNC_ERRORS }; +struct ocrdma_ae_lnkst_mcqe { + u32 speed_state_ptn; + u32 qos_reason_falut; + u32 evt_tag; + u32 valid_ae_event; +}; + +enum { + OCRDMA_AE_LSC_PORT_NUM_MASK = 0x3F, + OCRDMA_AE_LSC_PT_SHIFT = 0x06, + OCRDMA_AE_LSC_PT_MASK = (0x03 << + OCRDMA_AE_LSC_PT_SHIFT), + OCRDMA_AE_LSC_LS_SHIFT = 0x08, + OCRDMA_AE_LSC_LS_MASK = (0xFF << + OCRDMA_AE_LSC_LS_SHIFT), + OCRDMA_AE_LSC_LD_SHIFT = 0x10, + OCRDMA_AE_LSC_LD_MASK = (0xFF << + OCRDMA_AE_LSC_LD_SHIFT), + OCRDMA_AE_LSC_PPS_SHIFT = 0x18, + OCRDMA_AE_LSC_PPS_MASK = (0xFF << + OCRDMA_AE_LSC_PPS_SHIFT), + OCRDMA_AE_LSC_PPF_MASK = 0xFF, + OCRDMA_AE_LSC_ER_SHIFT = 0x08, + OCRDMA_AE_LSC_ER_MASK = (0xFF << + OCRDMA_AE_LSC_ER_SHIFT), + OCRDMA_AE_LSC_QOS_SHIFT = 0x10, + OCRDMA_AE_LSC_QOS_MASK = (0xFFFF << + OCRDMA_AE_LSC_QOS_SHIFT) +}; + +enum { + OCRDMA_AE_LSC_PLINK_DOWN = 0x00, + OCRDMA_AE_LSC_PLINK_UP = 0x01, + OCRDMA_AE_LSC_LLINK_DOWN = 0x02, + OCRDMA_AE_LSC_LLINK_MASK = 0x02, + OCRDMA_AE_LSC_LLINK_UP = 0x03 +}; + /* mailbox command request and responses */ enum { OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2, @@ -659,7 +717,7 @@ enum { OCRDMA_PHY_PFLT_SHIFT = 0x18, OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000, OCRDMA_QOS_LNKSP_SHIFT = 0x10, - OCRDMA_LLST_MASK = 0xFF, + OCRDMA_LINK_ST_MASK = 0x01, OCRDMA_PLFC_MASK = 0x00000400, OCRDMA_PLFC_SHIFT = 0x8, OCRDMA_PLRFC_MASK = 0x00000200, @@ -674,7 +732,7 @@ struct ocrdma_get_link_speed_rsp { u32 pflt_pps_ld_pnum; u32 qos_lsp; - u32 res_lls; + u32 res_lnk_st; }; enum { diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 48d7ef51a..86c303a62 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2014 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <rdma/ib_addr.h> #include <rdma/ib_pma.h> @@ -840,9 +855,9 @@ void ocrdma_rem_port_stats(struct ocrdma_dev *dev) { if (!dev->dir) return; + debugfs_remove(dev->dir); mutex_destroy(&dev->stats_lock); ocrdma_release_stats_mem(dev); - debugfs_remove(dev->dir); } void ocrdma_init_debugfs(void) diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_stats.h index 091edd68a..c9e58d04c 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_stats.h +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2014 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_STATS_H__ #define __OCRDMA_STATS_H__ diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 219f2122f..76e96f97b 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <linux/dma-mapping.h> #include <rdma/ib_verbs.h> @@ -31,6 +46,7 @@ #include <rdma/iw_cm.h> #include <rdma/ib_umem.h> #include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> #include "ocrdma.h" #include "ocrdma_hw.h" @@ -49,6 +65,7 @@ int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) int ocrdma_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *sgid) { + int ret; struct ocrdma_dev *dev; dev = get_ocrdma_dev(ibdev); @@ -56,15 +73,39 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, if (index >= OCRDMA_MAX_SGID) return -EINVAL; - memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid)); + ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL); + if (ret == -EAGAIN) { + memcpy(sgid, &zgid, sizeof(*sgid)); + return 0; + } + + return ret; +} + +int ocrdma_add_gid(struct ib_device *device, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context) { + return 0; +} +int ocrdma_del_gid(struct ib_device *device, + u8 port_num, + unsigned int index, + void **context) { return 0; } -int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) +int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, + struct ib_udata *uhw) { struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(attr, 0, sizeof *attr); memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); @@ -106,13 +147,31 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) return 0; } +struct net_device *ocrdma_get_netdev(struct ib_device *ibdev, u8 port_num) +{ + struct ocrdma_dev *dev; + struct net_device *ndev = NULL; + + rcu_read_lock(); + + dev = get_ocrdma_dev(ibdev); + if (dev) + ndev = dev->nic_info.netdev; + if (ndev) + dev_hold(ndev); + + rcu_read_unlock(); + + return ndev; +} + static inline void get_link_speed_and_width(struct ocrdma_dev *dev, u8 *ib_speed, u8 *ib_width) { int status; u8 speed; - status = ocrdma_mbx_get_link_speed(dev, &speed); + status = ocrdma_mbx_get_link_speed(dev, &speed, NULL); if (status) speed = OCRDMA_PHYS_LINK_SPEED_ZERO; @@ -175,7 +234,8 @@ int ocrdma_query_port(struct ib_device *ibdev, props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_IP_BASED_GIDS; + IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | + IB_PORT_IP_BASED_GIDS; props->gid_tbl_len = OCRDMA_MAX_SGID; props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; @@ -375,7 +435,12 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (dev->pd_mgr->pd_prealloc_valid) { status = ocrdma_get_pd_num(dev, pd); - return (status == 0) ? pd : ERR_PTR(status); + if (status == 0) { + return pd; + } else { + kfree(pd); + return ERR_PTR(status); + } } retry: @@ -948,6 +1013,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); + kfree(mr->pages); ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); /* it could be user registered memory. */ @@ -999,10 +1065,12 @@ err: return status; } -struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, +struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_ctx, struct ib_udata *udata) { + int entries = attr->cqe; struct ocrdma_cq *cq; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_ucontext *uctx = NULL; @@ -1010,6 +1078,9 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, int status; struct ocrdma_create_cq_ureq ureq; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return ERR_PTR(-EFAULT); @@ -1927,13 +1998,13 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, { struct ocrdma_ewqe_ud_hdr *ud_hdr = (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); - struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah); + struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah); - ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn; + ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn; if (qp->qp_type == IB_QPT_GSI) ud_hdr->qkey = qp->qkey; else - ud_hdr->qkey = wr->wr.ud.remote_qkey; + ud_hdr->qkey = ud_wr(wr)->remote_qkey; ud_hdr->rsvd_ahid = ah->id; if (ah->av->valid & OCRDMA_AV_VLAN_VALID) hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); @@ -2036,9 +2107,9 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); if (status) return status; - ext_rw->addr_lo = wr->wr.rdma.remote_addr; - ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); - ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->addr_lo = rdma_wr(wr)->remote_addr; + ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr); + ext_rw->lrkey = rdma_wr(wr)->rkey; ext_rw->len = hdr->total_len; return 0; } @@ -2056,46 +2127,12 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT); hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); - ext_rw->addr_lo = wr->wr.rdma.remote_addr; - ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); - ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->addr_lo = rdma_wr(wr)->remote_addr; + ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr); + ext_rw->lrkey = rdma_wr(wr)->rkey; ext_rw->len = hdr->total_len; } -static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl, - struct ocrdma_hw_mr *hwmr) -{ - int i; - u64 buf_addr = 0; - int num_pbes; - struct ocrdma_pbe *pbe; - - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - num_pbes = 0; - - /* go through the OS phy regions & fill hw pbe entries into pbls. */ - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - /* number of pbes can be more for one OS buf, when - * buffers are of different sizes. - * split the ib_buf to one or more pbes. - */ - buf_addr = wr->wr.fast_reg.page_list->page_list[i]; - pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); - pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); - num_pbes += 1; - pbe++; - - /* if the pbl is full storing the pbes, - * move to next pbl. - */ - if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - } - } - return; -} - static int get_encoded_page_size(int pg_sz) { /* Max size is 256M 4096 << 16 */ @@ -2106,48 +2143,59 @@ static int get_encoded_page_size(int pg_sz) return i; } - -static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) +static int ocrdma_build_reg(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr, + struct ib_reg_wr *wr) { u64 fbo; struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); - struct ocrdma_mr *mr; - struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); + struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr); + struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; + struct ocrdma_pbe *pbe; u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr); + int num_pbes = 0, i; wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); - if (wr->wr.fast_reg.page_list_len > dev->attr.max_pages_per_frmr) - return -EINVAL; - hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); - if (wr->wr.fast_reg.page_list_len == 0) - BUG(); - if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE) + if (wr->access & IB_ACCESS_LOCAL_WRITE) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; - if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE) + if (wr->access & IB_ACCESS_REMOTE_WRITE) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; - if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ) + if (wr->access & IB_ACCESS_REMOTE_READ) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; - hdr->lkey = wr->wr.fast_reg.rkey; - hdr->total_len = wr->wr.fast_reg.length; + hdr->lkey = wr->key; + hdr->total_len = mr->ibmr.length; - fbo = wr->wr.fast_reg.iova_start - - (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK); + fbo = mr->ibmr.iova - mr->pages[0]; - fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start); - fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff); + fast_reg->va_hi = upper_32_bits(mr->ibmr.iova); + fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff); fast_reg->fbo_hi = upper_32_bits(fbo); fast_reg->fbo_lo = (u32) fbo & 0xffffffff; - fast_reg->num_sges = wr->wr.fast_reg.page_list_len; - fast_reg->size_sge = - get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); - mr = (struct ocrdma_mr *) (unsigned long) - dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; - build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); + fast_reg->num_sges = mr->npages; + fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size); + + pbe = pbl_tbl->va; + for (i = 0; i < mr->npages; i++) { + u64 buf_addr = mr->pages[i]; + + pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); + num_pbes += 1; + pbe++; + + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + } + } + return 0; } @@ -2230,8 +2278,8 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT; hdr->lkey = wr->ex.invalidate_rkey; break; - case IB_WR_FAST_REG_MR: - status = ocrdma_build_fr(qp, hdr, wr); + case IB_WR_REG_MR: + status = ocrdma_build_reg(qp, hdr, reg_wr(wr)); break; default: status = -EINVAL; @@ -2497,7 +2545,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc, ibwc->opcode = IB_WC_SEND; break; case OCRDMA_FR_MR: - ibwc->opcode = IB_WC_FAST_REG_MR; + ibwc->opcode = IB_WC_REG_MR; break; case OCRDMA_LKEY_INV: ibwc->opcode = IB_WC_LOCAL_INV; @@ -2863,16 +2911,11 @@ expand_cqe: } stop_cqe: cq->getp = cur_getp; - if (cq->deferred_arm) { - ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, - polled_hw_cqes); + if (cq->deferred_arm || polled_hw_cqes) { + ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm, + cq->deferred_sol, polled_hw_cqes); cq->deferred_arm = false; cq->deferred_sol = false; - } else { - /* We need to pop the CQE. No need to arm */ - ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol, - polled_hw_cqes); - cq->deferred_sol = false; } return i; @@ -2969,21 +3012,32 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) return 0; } -struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) +struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, + enum ib_mr_type mr_type, + u32 max_num_sg) { int status; struct ocrdma_mr *mr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - if (max_page_list_len > dev->attr.max_pages_per_frmr) + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + if (max_num_sg > dev->attr.max_pages_per_frmr) return ERR_PTR(-EINVAL); mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - status = ocrdma_get_pbl_info(dev, mr, max_page_list_len); + mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mr->pages) { + status = -ENOMEM; + goto pl_err; + } + + status = ocrdma_get_pbl_info(dev, mr, max_num_sg); if (status) goto pbl_err; mr->hwmr.fr_mr = 1; @@ -3006,30 +3060,12 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); pbl_err: + kfree(mr->pages); +pl_err: kfree(mr); return ERR_PTR(-ENOMEM); } -struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device - *ibdev, - int page_list_len) -{ - struct ib_fast_reg_page_list *frmr_list; - int size; - - size = sizeof(*frmr_list) + (page_list_len * sizeof(u64)); - frmr_list = kzalloc(size, GFP_KERNEL); - if (!frmr_list) - return ERR_PTR(-ENOMEM); - frmr_list->page_list = (u64 *)(frmr_list + 1); - return frmr_list; -} - -void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) -{ - kfree(page_list); -} - #define MAX_KERNEL_PBE_SIZE 65536 static inline int count_kernel_pbes(struct ib_phys_buf *buf_list, int buf_cnt, u32 *pbe_size) @@ -3192,3 +3228,26 @@ pbl_err: kfree(mr); return ERR_PTR(status); } + +static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); + + if (unlikely(mr->npages == mr->hwmr.num_pbes)) + return -ENOMEM; + + mr->pages[mr->npages++] = addr; + + return 0; +} + +int ocrdma_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); + + mr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page); +} diff --git a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index b8f7853fd..a2f3b4dc2 100644 --- a/kernel/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/kernel/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_VERBS_H__ #define __OCRDMA_VERBS_H__ @@ -36,14 +51,29 @@ int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); -int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props); +int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props, + struct ib_udata *uhw); int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); int ocrdma_modify_port(struct ib_device *, u8 port, int mask, struct ib_port_modify *props); +enum rdma_protocol_type +ocrdma_query_protocol(struct ib_device *device, u8 port_num); + void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); int ocrdma_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid); +struct net_device *ocrdma_get_netdev(struct ib_device *device, u8 port_num); +int ocrdma_add_gid(struct ib_device *device, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context); +int ocrdma_del_gid(struct ib_device *device, + u8 port_num, + unsigned int index, + void **context); int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *, @@ -56,8 +86,10 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *, struct ib_ucontext *, struct ib_udata *); int ocrdma_dealloc_pd(struct ib_pd *pd); -struct ib_cq *ocrdma_create_cq(struct ib_device *, int entries, int vector, - struct ib_ucontext *, struct ib_udata *); +struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); int ocrdma_destroy_cq(struct ib_cq *); @@ -90,10 +122,11 @@ struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *, int num_phys_buf, int acc, u64 *iova_start); struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, u64 virt, int acc, struct ib_udata *); -struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *pd, int max_page_list_len); -struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device - *ibdev, - int page_list_len); -void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list); +struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); +int ocrdma_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); #endif /* __OCRDMA_VERBS_H__ */ diff --git a/kernel/drivers/infiniband/hw/qib/qib_cq.c b/kernel/drivers/infiniband/hw/qib/qib_cq.c index ab4e11cfa..2b45d0b02 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_cq.c +++ b/kernel/drivers/infiniband/hw/qib/qib_cq.c @@ -203,7 +203,7 @@ static void send_complete(struct kthread_work *work) /** * qib_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to - * @entries: the minimum size of the completion queue + * @attr: creation attributes * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * @@ -212,16 +212,21 @@ static void send_complete(struct kthread_work *work) * * Called by ib_create_cq() in the generic verbs code. */ -struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, struct ib_ucontext *context, +struct ib_cq *qib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct qib_ibdev *dev = to_idev(ibdev); struct qib_cq *cq; struct qib_cq_wc *wc; struct ib_cq *ret; u32 sz; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 1 || entries > ib_qib_max_cqes) { ret = ERR_PTR(-EINVAL); goto done; diff --git a/kernel/drivers/infiniband/hw/qib/qib_file_ops.c b/kernel/drivers/infiniband/hw/qib/qib_file_ops.c index 725881890..e449e3949 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/kernel/drivers/infiniband/hw/qib/qib_file_ops.c @@ -908,7 +908,7 @@ static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return 0; } -static struct vm_operations_struct qib_file_vm_ops = { +static const struct vm_operations_struct qib_file_vm_ops = { .fault = qib_file_vma_fault, }; diff --git a/kernel/drivers/infiniband/hw/qib/qib_fs.c b/kernel/drivers/infiniband/hw/qib/qib_fs.c index bdd5d3857..13ef22bd9 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_fs.c +++ b/kernel/drivers/infiniband/hw/qib/qib_fs.c @@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name) } spin_lock(&tmp->d_lock); - if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { + if (simple_positive(tmp)) { __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(d_inode(parent), tmp); diff --git a/kernel/drivers/infiniband/hw/qib/qib_iba7322.c b/kernel/drivers/infiniband/hw/qib/qib_iba7322.c index f32b4628e..6c8ff1010 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/kernel/drivers/infiniband/hw/qib/qib_iba7322.c @@ -5502,7 +5502,8 @@ static void try_7322_ipg(struct qib_pportdata *ppd) goto retry; send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) goto retry; diff --git a/kernel/drivers/infiniband/hw/qib/qib_init.c b/kernel/drivers/infiniband/hw/qib/qib_init.c index 7e00470ad..4ff340fe9 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_init.c +++ b/kernel/drivers/infiniband/hw/qib/qib_init.c @@ -1680,7 +1680,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) * heavy filesystem activity makes these fail, and we can * use compound pages. */ - gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; egrcnt = rcd->rcvegrcnt; egroff = rcd->rcvegr_tid_base; diff --git a/kernel/drivers/infiniband/hw/qib/qib_keys.c b/kernel/drivers/infiniband/hw/qib/qib_keys.c index 5afaa2185..d725c5655 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_keys.c +++ b/kernel/drivers/infiniband/hw/qib/qib_keys.c @@ -336,14 +336,15 @@ bail: } /* - * Initialize the memory region specified by the work reqeust. + * Initialize the memory region specified by the work request. */ -int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) +int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr) { struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; struct qib_pd *pd = to_ipd(qp->ibqp.pd); - struct qib_mregion *mr; - u32 rkey = wr->wr.fast_reg.rkey; + struct qib_mr *mr = to_imr(wr->mr); + struct qib_mregion *mrg; + u32 key = wr->key; unsigned i, n, m; int ret = -EINVAL; unsigned long flags; @@ -351,33 +352,33 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) size_t ps; spin_lock_irqsave(&rkt->lock, flags); - if (pd->user || rkey == 0) + if (pd->user || key == 0) goto bail; - mr = rcu_dereference_protected( - rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], + mrg = rcu_dereference_protected( + rkt->table[(key >> (32 - ib_qib_lkey_table_size))], lockdep_is_held(&rkt->lock)); - if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) + if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd)) goto bail; - if (wr->wr.fast_reg.page_list_len > mr->max_segs) + if (mr->npages > mrg->max_segs) goto bail; - ps = 1UL << wr->wr.fast_reg.page_shift; - if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) + ps = mr->ibmr.page_size; + if (mr->ibmr.length > ps * mr->npages) goto bail; - mr->user_base = wr->wr.fast_reg.iova_start; - mr->iova = wr->wr.fast_reg.iova_start; - mr->lkey = rkey; - mr->length = wr->wr.fast_reg.length; - mr->access_flags = wr->wr.fast_reg.access_flags; - page_list = wr->wr.fast_reg.page_list->page_list; + mrg->user_base = mr->ibmr.iova; + mrg->iova = mr->ibmr.iova; + mrg->lkey = key; + mrg->length = mr->ibmr.length; + mrg->access_flags = wr->access; + page_list = mr->pages; m = 0; n = 0; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - mr->map[m]->segs[n].vaddr = (void *) page_list[i]; - mr->map[m]->segs[n].length = ps; + for (i = 0; i < mr->npages; i++) { + mrg->map[m]->segs[n].vaddr = (void *) page_list[i]; + mrg->map[m]->segs[n].length = ps; if (++n == QIB_SEGSZ) { m++; n = 0; diff --git a/kernel/drivers/infiniband/hw/qib/qib_mad.c b/kernel/drivers/infiniband/hw/qib/qib_mad.c index 395f4046d..9625e7c43 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_mad.c +++ b/kernel/drivers/infiniband/hw/qib/qib_mad.c @@ -83,7 +83,8 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) return; send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; @@ -1854,7 +1855,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, } static int process_subn(struct ib_device *ibdev, int mad_flags, - u8 port, struct ib_mad *in_mad, + u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_smp *smp = (struct ib_smp *)out_mad; @@ -2006,7 +2007,7 @@ bail: } static int process_perf(struct ib_device *ibdev, u8 port, - struct ib_mad *in_mad, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; @@ -2299,7 +2300,7 @@ static int check_cc_key(struct qib_ibport *ibp, } static int process_cc(struct ib_device *ibdev, int mad_flags, - u8 port, struct ib_mad *in_mad, + u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; @@ -2400,12 +2401,20 @@ bail: * This is called by the ib_mad module. */ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int ret; struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: diff --git a/kernel/drivers/infiniband/hw/qib/qib_mad.h b/kernel/drivers/infiniband/hw/qib/qib_mad.h index 941d4d50d..57e99dc0d 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_mad.h +++ b/kernel/drivers/infiniband/hw/qib/qib_mad.h @@ -36,148 +36,17 @@ #include <rdma/ib_pma.h> -#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) -#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) -#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C) -#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C) +#define IB_SMP_UNSUP_VERSION \ +cpu_to_be16(IB_MGMT_MAD_STATUS_BAD_VERSION) -struct ib_node_info { - u8 base_version; - u8 class_version; - u8 node_type; - u8 num_ports; - __be64 sys_guid; - __be64 node_guid; - __be64 port_guid; - __be16 partition_cap; - __be16 device_id; - __be32 revision; - u8 local_port_num; - u8 vendor_id[3]; -} __packed; - -struct ib_mad_notice_attr { - u8 generic_type; - u8 prod_type_msb; - __be16 prod_type_lsb; - __be16 trap_num; - __be16 issuer_lid; - __be16 toggle_count; - - union { - struct { - u8 details[54]; - } raw_data; - - struct { - __be16 reserved; - __be16 lid; /* where violation happened */ - u8 port_num; /* where violation happened */ - } __packed ntc_129_131; - - struct { - __be16 reserved; - __be16 lid; /* LID where change occurred */ - u8 reserved2; - u8 local_changes; /* low bit - local changes */ - __be32 new_cap_mask; /* new capability mask */ - u8 reserved3; - u8 change_flags; /* low 3 bits only */ - } __packed ntc_144; - - struct { - __be16 reserved; - __be16 lid; /* lid where sys guid changed */ - __be16 reserved2; - __be64 new_sys_guid; - } __packed ntc_145; - - struct { - __be16 reserved; - __be16 lid; - __be16 dr_slid; - u8 method; - u8 reserved2; - __be16 attr_id; - __be32 attr_mod; - __be64 mkey; - u8 reserved3; - u8 dr_trunc_hop; - u8 dr_rtn_path[30]; - } __packed ntc_256; - - struct { - __be16 reserved; - __be16 lid1; - __be16 lid2; - __be32 key; - __be32 sl_qp1; /* SL: high 4 bits */ - __be32 qp2; /* high 8 bits reserved */ - union ib_gid gid1; - union ib_gid gid2; - } __packed ntc_257_258; - - } details; -}; - -/* - * Generic trap/notice types - */ -#define IB_NOTICE_TYPE_FATAL 0x80 -#define IB_NOTICE_TYPE_URGENT 0x81 -#define IB_NOTICE_TYPE_SECURITY 0x82 -#define IB_NOTICE_TYPE_SM 0x83 -#define IB_NOTICE_TYPE_INFO 0x84 +#define IB_SMP_UNSUP_METHOD \ +cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD) -/* - * Generic trap/notice producers - */ -#define IB_NOTICE_PROD_CA cpu_to_be16(1) -#define IB_NOTICE_PROD_SWITCH cpu_to_be16(2) -#define IB_NOTICE_PROD_ROUTER cpu_to_be16(3) -#define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4) +#define IB_SMP_UNSUP_METH_ATTR \ +cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB) -/* - * Generic trap/notice numbers - */ -#define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129) -#define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130) -#define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131) -#define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144) -#define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145) -#define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256) -#define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257) -#define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258) - -/* - * Repress trap/notice flags - */ -#define IB_NOTICE_REPRESS_LLI_THRESH (1 << 0) -#define IB_NOTICE_REPRESS_EBO_THRESH (1 << 1) -#define IB_NOTICE_REPRESS_FLOW_UPDATE (1 << 2) -#define IB_NOTICE_REPRESS_CAP_MASK_CHG (1 << 3) -#define IB_NOTICE_REPRESS_SYS_GUID_CHG (1 << 4) -#define IB_NOTICE_REPRESS_BAD_MKEY (1 << 5) -#define IB_NOTICE_REPRESS_BAD_PKEY (1 << 6) -#define IB_NOTICE_REPRESS_BAD_QKEY (1 << 7) - -/* - * Generic trap/notice other local changes flags (trap 144). - */ -#define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */ -#define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */ -#define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01 - -/* - * Generic trap/notice M_Key volation flags in dr_trunc_hop (trap 256). - */ -#define IB_NOTICE_TRAP_DR_NOTICE 0x80 -#define IB_NOTICE_TRAP_DR_TRUNC 0x40 - -struct ib_vl_weight_elem { - u8 vl; /* Only low 4 bits, upper 4 bits reserved */ - u8 weight; -}; +#define IB_SMP_INVALID_FIELD \ +cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE) #define IB_VLARB_LOWPRI_0_31 1 #define IB_VLARB_LOWPRI_32_63 2 diff --git a/kernel/drivers/infiniband/hw/qib/qib_mmap.c b/kernel/drivers/infiniband/hw/qib/qib_mmap.c index 146cf29a2..34927b700 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_mmap.c +++ b/kernel/drivers/infiniband/hw/qib/qib_mmap.c @@ -75,7 +75,7 @@ static void qib_vma_close(struct vm_area_struct *vma) kref_put(&ip->ref, qib_release_mmap_info); } -static struct vm_operations_struct qib_vm_ops = { +static const struct vm_operations_struct qib_vm_ops = { .open = qib_vma_open, .close = qib_vma_close, }; diff --git a/kernel/drivers/infiniband/hw/qib/qib_mr.c b/kernel/drivers/infiniband/hw/qib/qib_mr.c index c4473db46..294f5c706 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_mr.c +++ b/kernel/drivers/infiniband/hw/qib/qib_mr.c @@ -303,6 +303,7 @@ int qib_dereg_mr(struct ib_mr *ibmr) int ret = 0; unsigned long timeout; + kfree(mr->pages); qib_free_lkey(&mr->mr); qib_put_mr(&mr->mr); /* will set completion if last */ @@ -323,49 +324,55 @@ out: /* * Allocate a memory region usable with the - * IB_WR_FAST_REG_MR send work request. + * IB_WR_REG_MR send work request. * * Return the memory region on success, otherwise return an errno. */ -struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) +struct ib_mr *qib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) { struct qib_mr *mr; - mr = alloc_mr(max_page_list_len, pd); + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = alloc_mr(max_num_sg, pd); if (IS_ERR(mr)) return (struct ib_mr *)mr; + mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mr->pages) + goto err; + return &mr->ibmr; + +err: + qib_dereg_mr(&mr->ibmr); + return ERR_PTR(-ENOMEM); } -struct ib_fast_reg_page_list * -qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) +static int qib_set_page(struct ib_mr *ibmr, u64 addr) { - unsigned size = page_list_len * sizeof(u64); - struct ib_fast_reg_page_list *pl; - - if (size > PAGE_SIZE) - return ERR_PTR(-EINVAL); - - pl = kzalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) - return ERR_PTR(-ENOMEM); + struct qib_mr *mr = to_imr(ibmr); - pl->page_list = kzalloc(size, GFP_KERNEL); - if (!pl->page_list) - goto err_free; + if (unlikely(mr->npages == mr->mr.max_segs)) + return -ENOMEM; - return pl; + mr->pages[mr->npages++] = addr; -err_free: - kfree(pl); - return ERR_PTR(-ENOMEM); + return 0; } -void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl) +int qib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) { - kfree(pl->page_list); - kfree(pl); + struct qib_mr *mr = to_imr(ibmr); + + mr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page); } /** diff --git a/kernel/drivers/infiniband/hw/qib/qib_qp.c b/kernel/drivers/infiniband/hw/qib/qib_qp.c index 4fa88ba29..3eff35c2d 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_qp.c +++ b/kernel/drivers/infiniband/hw/qib/qib_qp.c @@ -100,9 +100,10 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) +static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map, + gfp_t gfp) { - unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. @@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, - enum ib_qp_type type, u8 port) + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct qpn_map *map; @@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map); + get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } @@ -436,7 +437,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; } @@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, size_t sz; size_t sg_list_sz; struct ib_qp *ret; + gfp_t gfp; + if (init_attr->cap.max_send_sge > ib_qib_max_sges || init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || - init_attr->create_flags) { - ret = ERR_PTR(-EINVAL); - goto bail; - } + init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) + return ERR_PTR(-EINVAL); + + /* GFP_NOIO is applicable in RC QPs only */ + if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && + init_attr->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? + GFP_NOIO : GFP_KERNEL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { @@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, sz = sizeof(struct qib_sge) * init_attr->cap.max_send_sge + sizeof(struct qib_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz, + gfp, PAGE_KERNEL); if (swq == NULL) { ret = ERR_PTR(-ENOMEM); goto bail; @@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); - qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp); if (!qp->s_hdr) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct qib_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + - qp->r_rq.size * sz); + if (gfp != GFP_NOIO) + qp->r_rq.wq = vmalloc_user( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz); + else + qp->r_rq.wq = __vmalloc( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz, + gfp, PAGE_KERNEL); + if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, dev = to_idev(ibpd->device); dd = dd_from_dev(dev); err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); diff --git a/kernel/drivers/infiniband/hw/qib/qib_qsfp.c b/kernel/drivers/infiniband/hw/qib/qib_qsfp.c index 5e27f7680..4c7c3c84a 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_qsfp.c +++ b/kernel/drivers/infiniband/hw/qib/qib_qsfp.c @@ -292,7 +292,7 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp) qib_dev_porterr(ppd->dd, ppd->port, "QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]); - if ((peek[2] & 2) == 0) { + if ((peek[2] & 4) == 0) { /* * If cable is paged, rather than "flat memory", we need to * set the page to zero, Even if it already appears to be zero. @@ -538,7 +538,7 @@ int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len) sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n", QSFP_DATE_LEN, cd.date); sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n", - QSFP_LOT_LEN, cd.date); + QSFP_LOT_LEN, cd.lot); while (bidx < QSFP_DEFAULT_HDR_CNT) { int iidx; diff --git a/kernel/drivers/infiniband/hw/qib/qib_rc.c b/kernel/drivers/infiniband/hw/qib/qib_rc.c index 4544d6f88..e6b7556d5 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_rc.c +++ b/kernel/drivers/infiniband/hw/qib/qib_rc.c @@ -373,10 +373,11 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; goto bail; } + ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); wqe->lpsn = wqe->psn; @@ -386,15 +387,15 @@ int qib_make_rc_req(struct qib_qp *qp) len = pmtu; break; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE) qp->s_state = OP(RDMA_WRITE_ONLY); else { - qp->s_state = - OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); /* Immediate data comes after RETH */ - ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + ohdr->u.rc.imm_data = + wqe->rdma_wr.wr.ex.imm_data; hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) + if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; } bth2 |= IB_BTH_REQ_ACK; @@ -424,10 +425,11 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_next_psn += (len - 1) / pmtu; wqe->lpsn = qp->s_next_psn++; } + ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); @@ -455,24 +457,24 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_lsn++; wqe->lpsn = wqe->psn; } - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); + wqe->atomic_wr.swap); ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); } else { qp->s_state = OP(FETCH_ADD); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); ohdr->u.atomic_eth.compare_data = 0; } ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr >> 32); + wqe->atomic_wr.remote_addr >> 32); ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr); + wqe->atomic_wr.remote_addr); ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); + wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); ss = NULL; len = 0; @@ -597,9 +599,9 @@ int qib_make_rc_req(struct qib_qp *qp) */ len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu; ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + cpu_to_be64(wqe->rdma_wr.remote_addr + len); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); diff --git a/kernel/drivers/infiniband/hw/qib/qib_ruc.c b/kernel/drivers/infiniband/hw/qib/qib_ruc.c index f42bd0f47..b1aa21bdd 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_ruc.c +++ b/kernel/drivers/infiniband/hw/qib/qib_ruc.c @@ -32,6 +32,7 @@ */ #include <linux/spinlock.h> +#include <rdma/ib_smi.h> #include "qib.h" #include "qib_mad.h" @@ -458,8 +459,8 @@ again: if (wqe->length == 0) break; if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; qp->r_sge.sg_list = NULL; @@ -471,8 +472,8 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; release = 0; @@ -489,18 +490,18 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), - wqe->wr.wr.atomic.remote_addr, - wqe->wr.wr.atomic.rkey, + wqe->atomic_wr.remote_addr, + wqe->atomic_wr.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = wqe->wr.wr.atomic.compare_add; + sdata = wqe->atomic_wr.compare_add; *(u64 *) sqp->s_sge.sge.vaddr = - (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? + (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); + sdata, wqe->atomic_wr.swap); qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -784,7 +785,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || diff --git a/kernel/drivers/infiniband/hw/qib/qib_uc.c b/kernel/drivers/infiniband/hw/qib/qib_uc.c index aa3a8035b..06a564589 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_uc.c +++ b/kernel/drivers/infiniband/hw/qib/qib_uc.c @@ -129,9 +129,9 @@ int qib_make_uc_req(struct qib_qp *qp) case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / 4; if (len > pmtu) { diff --git a/kernel/drivers/infiniband/hw/qib/qib_ud.c b/kernel/drivers/infiniband/hw/qib/qib_ud.c index 26243b722..59193f67e 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_ud.c +++ b/kernel/drivers/infiniband/hw/qib/qib_ud.c @@ -59,7 +59,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) u32 length; enum ib_qp_type sqptype, dqptype; - qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); + qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->n_pkt_drops++; return; @@ -76,7 +76,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) goto drop; } - ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(swqe->ud_wr.ah)->attr; ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -106,8 +106,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) if (qp->ibqp.qp_num) { u32 qkey; - qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? - sqp->qkey : swqe->wr.wr.ud.remote_qkey; + qkey = (int)swqe->ud_wr.remote_qkey < 0 ? + sqp->qkey : swqe->ud_wr.remote_qkey; if (unlikely(qkey != qp->qkey)) { u16 lid; @@ -210,7 +210,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? - swqe->wr.wr.ud.pkey_index : 0; + swqe->ud_wr.pkey_index : 0; wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); wc.sl = ah_attr->sl; wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); @@ -277,7 +277,7 @@ int qib_make_ud_req(struct qib_qp *qp) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(wqe->ud_wr.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) this_cpu_inc(ibp->pmastats->n_multicast_xmit); @@ -363,7 +363,7 @@ int qib_make_ud_req(struct qib_qp *qp) bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY : qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ? - wqe->wr.wr.ud.pkey_index : qp->s_pkey_index); + wqe->ud_wr.pkey_index : qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); /* * Use the multicast QP if the destination LID is a multicast LID. @@ -371,14 +371,14 @@ int qib_make_ud_req(struct qib_qp *qp) ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE && ah_attr->dlid != QIB_PERMISSIVE_LID ? cpu_to_be32(QIB_MULTICAST_QPN) : - cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + cpu_to_be32(wqe->ud_wr.remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? - qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? + qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: diff --git a/kernel/drivers/infiniband/hw/qib/qib_verbs.c b/kernel/drivers/infiniband/hw/qib/qib_verbs.c index 9dd5d9a05..de6cb6fcd 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_verbs.c +++ b/kernel/drivers/infiniband/hw/qib/qib_verbs.c @@ -362,8 +362,8 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, * undefined operations. * Make sure buffer is large enough to hold the result for atomics. */ - if (wr->opcode == IB_WR_FAST_REG_MR) { - if (qib_fast_reg_mr(qp, wr)) + if (wr->opcode == IB_WR_REG_MR) { + if (qib_reg_mr(qp, reg_wr(wr))) goto bail_inval; } else if (qp->ibqp.qp_type == IB_QPT_UC) { if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) @@ -374,7 +374,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, wr->opcode != IB_WR_SEND_WITH_IMM) goto bail_inval; /* Check UD destination address PD */ - if (qp->ibqp.pd != wr->wr.ud.ah->pd) + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) goto bail_inval; } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) goto bail_inval; @@ -397,7 +397,23 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, rkt = &to_idev(qp->ibqp.device)->lk_table; pd = to_ipd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_REG_MR) + memcpy(&wqe->reg_wr, reg_wr(wr), + sizeof(wqe->reg_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + wqe->length = 0; j = 0; if (wr->num_sge) { @@ -426,7 +442,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, qp->port_num - 1)->ibmtu) goto bail_inval_free; else - atomic_inc(&to_iah(wr->wr.ud.ah)->refcount); + atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount); wqe->ssn = qp->s_ssn++; qp->s_head = next; @@ -1551,12 +1567,14 @@ full: } } -static int qib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct qib_devdata *dd = dd_from_ibdev(ibdev); struct qib_ibdev *dev = to_idev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; memset(props, 0, sizeof(*props)); props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | @@ -1573,6 +1591,7 @@ static int qib_query_device(struct ib_device *ibdev, props->max_qp = ib_qib_max_qps; props->max_qp_wr = ib_qib_max_qp_wrs; props->max_sge = ib_qib_max_sges; + props->max_sge_rd = ib_qib_max_sges; props->max_cq = ib_qib_max_cqs; props->max_ah = ib_qib_max_ahs; props->max_cqe = ib_qib_max_cqes; @@ -2041,6 +2060,24 @@ static void init_ibport(struct qib_pportdata *ppd) RCU_INIT_POINTER(ibp->qp1, NULL); } +static int qib_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = qib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + /** * qib_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -2222,9 +2259,8 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->reg_phys_mr = qib_reg_phys_mr; ibdev->reg_user_mr = qib_reg_user_mr; ibdev->dereg_mr = qib_dereg_mr; - ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr; - ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list; - ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list; + ibdev->alloc_mr = qib_alloc_mr; + ibdev->map_mr_sg = qib_map_mr_sg; ibdev->alloc_fmr = qib_alloc_fmr; ibdev->map_phys_fmr = qib_map_phys_fmr; ibdev->unmap_fmr = qib_unmap_fmr; @@ -2234,6 +2270,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->process_mad = qib_process_mad; ibdev->mmap = qib_mmap; ibdev->dma_ops = &qib_dma_mapping_ops; + ibdev->get_port_immutable = qib_port_immutable; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); diff --git a/kernel/drivers/infiniband/hw/qib/qib_verbs.h b/kernel/drivers/infiniband/hw/qib/qib_verbs.h index 44ca28c83..bc803f33d 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_verbs.h +++ b/kernel/drivers/infiniband/hw/qib/qib_verbs.h @@ -329,6 +329,8 @@ struct qib_sge { struct qib_mr { struct ib_mr ibmr; struct ib_umem *umem; + u64 *pages; + u32 npages; struct qib_mregion mr; /* must be last */ }; @@ -338,7 +340,13 @@ struct qib_mr { * in qp->s_max_sge. */ struct qib_swqe { - struct ib_send_wr wr; /* don't use wr.sg_list */ + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ @@ -874,8 +882,10 @@ void qib_cap_mask_chg(struct qib_ibport *ibp); void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int qib_create_agents(struct qib_ibdev *dev); void qib_free_agents(struct qib_ibdev *dev); @@ -1009,8 +1019,9 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, struct ib_ucontext *context, +struct ib_cq *qib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata); int qib_destroy_cq(struct ib_cq *ibcq); @@ -1031,14 +1042,15 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int qib_dereg_mr(struct ib_mr *ibmr); -struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); - -struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list( - struct ib_device *ibdev, int page_list_len); +struct ib_mr *qib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_entries); -void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); +int qib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); -int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr); +int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr); struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); diff --git a/kernel/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/kernel/drivers/infiniband/hw/qib/qib_verbs_mcast.c index f8ea069a3..b2fb5286d 100644 --- a/kernel/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ b/kernel/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct qib_mcast *mcast = NULL; - struct qib_mcast_qp *p, *tmp; + struct qib_mcast_qp *p, *tmp, *delp = NULL; struct rb_node *n; int last = 0; int ret; - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { - ret = -EINVAL; - goto bail; - } + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; spin_lock_irq(&ibp->lock); @@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) while (1) { if (n == NULL) { spin_unlock_irq(&ibp->lock); - ret = -EINVAL; - goto bail; + return -EINVAL; } mcast = rb_entry(n, struct qib_mcast, rb_node); @@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) */ list_del_rcu(&p->list); mcast->n_attached--; + delp = p; /* If this was the last attached QP, remove the GID too. */ if (list_empty(&mcast->qp_list)) { @@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } spin_unlock_irq(&ibp->lock); + /* QP not attached */ + if (!delp) + return -EINVAL; + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + qib_mcast_qp_free(delp); - if (p) { - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - qib_mcast_qp_free(p); - } if (last) { atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); @@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) dev->n_mcast_grps_allocated--; spin_unlock_irq(&dev->n_mcast_grps_lock); } - - ret = 0; - -bail: - return ret; + return 0; } int qib_mcast_tree_empty(struct qib_ibport *ibp) diff --git a/kernel/drivers/infiniband/hw/usnic/usnic.h b/kernel/drivers/infiniband/hw/usnic/usnic.h index 5be13d899..f903502d3 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_abi.h b/kernel/drivers/infiniband/hw/usnic/usnic_abi.h index 04a662295..7fe9502ce 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_abi.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_abi.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/kernel/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h index 393567266..596e0ed49 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_common_util.h b/kernel/drivers/infiniband/hw/usnic/usnic_common_util.h index 9d737ed5e..b54986de5 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_common_util.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_common_util.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_debugfs.c b/kernel/drivers/infiniband/hw/usnic/usnic_debugfs.c index 5d1386016..5e55b8bc6 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_debugfs.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_debugfs.h b/kernel/drivers/infiniband/hw/usnic/usnic_debugfs.h index 4087d24a8..98453e91d 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_debugfs.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_debugfs.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_fwd.c b/kernel/drivers/infiniband/hw/usnic/usnic_fwd.c index e3c9bd9d3..3c37dd59c 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_fwd.h b/kernel/drivers/infiniband/hw/usnic/usnic_fwd.h index 93713a223..3a8add9dd 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_ib.h b/kernel/drivers/infiniband/hw/usnic/usnic_ib.h index e5a9297dd..525bf2726 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_ib.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_ib.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_ib_main.c b/kernel/drivers/infiniband/hw/usnic/usnic_ib_main.c index 0d0f98695..565c881a4 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF @@ -300,6 +315,22 @@ static struct notifier_block usnic_ib_inetaddr_notifier = { }; /* End of inet section*/ +static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = usnic_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + return 0; +} + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -312,16 +343,15 @@ static void *usnic_ib_device_add(struct pci_dev *dev) netdev = pci_get_drvdata(dev); us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); - if (IS_ERR_OR_NULL(us_ibdev)) { + if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT); + return ERR_PTR(-EFAULT); } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); - if (IS_ERR_OR_NULL(us_ibdev->ufdev)) { - usnic_err("Failed to alloc ufdev for %s with err %ld\n", - pci_name(dev), PTR_ERR(us_ibdev->ufdev)); + if (!us_ibdev->ufdev) { + usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev)); goto err_dealloc; } @@ -383,6 +413,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; + us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; if (ib_register_device(&us_ibdev->ib_dev, NULL)) diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/kernel/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index db3588df3..fcea3a24d 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF @@ -221,8 +236,8 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, /* Create Flow Handle */ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); - if (IS_ERR_OR_NULL(qp_flow)) { - err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; + if (!qp_flow) { + err = -ENOMEM; goto out_dealloc_flow; } qp_flow->flow = flow; @@ -296,8 +311,8 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp, /* Create qp_flow */ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); - if (IS_ERR_OR_NULL(qp_flow)) { - err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; + if (!qp_flow) { + err = -ENOMEM; goto out_dealloc_flow; } qp_flow->flow = flow; diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/kernel/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h index b0aafe8db..b1458be1d 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/kernel/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 27dc67c16..3412ea061 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/kernel/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h index 0d09b493c..3d98e16cf 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/kernel/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 53bd6a2d9..f8e321168 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF @@ -248,7 +263,8 @@ enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, } int usnic_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); union ib_gid gid; @@ -257,6 +273,9 @@ int usnic_ib_query_device(struct ib_device *ibdev, int qp_per_vf; usnic_dbg("\n"); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + mutex_lock(&us_ibdev->usdev_lock); us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); @@ -570,13 +589,17 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return status; } -struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, - struct ib_udata *udata) +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) { struct ib_cq *cq; usnic_dbg("\n"); + if (attr->flags) + return ERR_PTR(-EINVAL); + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) return ERR_PTR(-EBUSY); diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/kernel/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index bb864f5ae..414eaa566 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF @@ -24,9 +39,12 @@ enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, u8 port_num); int usnic_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props); + struct ib_device_attr *props, + struct ib_udata *uhw); int usnic_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); +enum rdma_protocol_type +usnic_ib_query_protocol(struct ib_device *device, u8 port_num); int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); @@ -44,9 +62,10 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, int usnic_ib_destroy_qp(struct ib_qp *qp); int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, - struct ib_udata *udata); +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); int usnic_ib_destroy_cq(struct ib_cq *cq); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_log.h b/kernel/drivers/infiniband/hw/usnic/usnic_log.h index 75777a66c..183fcb6a9 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_log.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_log.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_transport.c b/kernel/drivers/infiniband/hw/usnic/usnic_transport.c index ddef6f77a..de318389a 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_transport.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_transport.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_transport.h b/kernel/drivers/infiniband/hw/usnic/usnic_transport.h index 7e5dc6d9f..9a7a2d975 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_transport.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_transport.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_uiom.c b/kernel/drivers/infiniband/hw/usnic/usnic_uiom.c index 417de1f32..645a5f6e6 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -7,7 +7,7 @@ * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: + * BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following @@ -472,11 +472,10 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) return ERR_PTR(-ENOMEM); pd->domain = domain = iommu_domain_alloc(&pci_bus_type); - if (IS_ERR_OR_NULL(domain)) { - usnic_err("Failed to allocate IOMMU domain with err %ld\n", - PTR_ERR(pd->domain)); + if (!domain) { + usnic_err("Failed to allocate IOMMU domain"); kfree(pd); - return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM); + return ERR_PTR(-ENOMEM); } iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL); diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_uiom.h b/kernel/drivers/infiniband/hw/usnic/usnic_uiom.h index 70440996e..45ca7c161 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/kernel/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c index 3a4288e0f..42b4b4c4e 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/kernel/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h index d4f752e25..c0b0b876a 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_vnic.c b/kernel/drivers/infiniband/hw/usnic/usnic_vnic.c index 656b88c39..66de93fb8 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_vnic.c +++ b/kernel/drivers/infiniband/hw/usnic/usnic_vnic.c @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/hw/usnic/usnic_vnic.h b/kernel/drivers/infiniband/hw/usnic/usnic_vnic.h index 14d931a88..a08423e47 100644 --- a/kernel/drivers/infiniband/hw/usnic/usnic_vnic.h +++ b/kernel/drivers/infiniband/hw/usnic/usnic_vnic.h @@ -1,9 +1,24 @@ /* * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF diff --git a/kernel/drivers/infiniband/ulp/ipoib/ipoib.h b/kernel/drivers/infiniband/ulp/ipoib/ipoib.h index bd94b0a6e..3ede10309 100644 --- a/kernel/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/kernel/drivers/infiniband/ulp/ipoib/ipoib.h @@ -80,7 +80,7 @@ enum { IPOIB_NUM_WC = 4, IPOIB_MAX_PATH_REC_QUEUE = 3, - IPOIB_MAX_MCAST_QUEUE = 3, + IPOIB_MAX_MCAST_QUEUE = 64, IPOIB_FLAG_OPER_UP = 0, IPOIB_FLAG_INITIALIZED = 1, @@ -239,7 +239,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_cm_tx_buf *tx_ring; + struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; @@ -342,7 +342,6 @@ struct ipoib_dev_priv { u16 pkey; u16 pkey_index; struct ib_pd *pd; - struct ib_mr *mr; struct ib_cq *recv_cq; struct ib_cq *send_cq; struct ib_qp *qp; @@ -361,7 +360,7 @@ struct ipoib_dev_priv { unsigned tx_head; unsigned tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; - struct ib_send_wr tx_wr; + struct ib_ud_wr tx_wr; unsigned tx_outstanding; struct ib_wc send_wc[MAX_SEND_CQE]; @@ -496,6 +495,7 @@ void ipoib_dev_cleanup(struct net_device *dev); void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_carrier_on_task(struct work_struct *work); void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); +void ipoib_mcast_free(struct ipoib_mcast *mc); void ipoib_mcast_restart_task(struct work_struct *work); int ipoib_mcast_start_thread(struct net_device *dev); @@ -504,6 +504,33 @@ int ipoib_mcast_stop_thread(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); +void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req); + +static inline void ipoib_build_sge(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req) +{ + int i, off; + struct sk_buff *skb = tx_req->skb; + skb_frag_t *frags = skb_shinfo(skb)->frags; + int nr_frags = skb_shinfo(skb)->nr_frags; + u64 *mapping = tx_req->mapping; + + if (skb_headlen(skb)) { + priv->tx_sge[0].addr = mapping[0]; + priv->tx_sge[0].length = skb_headlen(skb); + off = 1; + } else + off = 0; + + for (i = 0; i < nr_frags; ++i) { + priv->tx_sge[i + off].addr = mapping[i + off]; + priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); + } + priv->tx_wr.wr.num_sge = nr_frags + off; +} + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev); int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter); @@ -522,6 +549,8 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey); +int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast); +struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid); int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); diff --git a/kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c index cf32a778e..3ae9726ef 100644 --- a/kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -332,7 +332,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev, int i; for (i = 0; i < priv->cm.num_frags; ++i) - sge[i].lkey = priv->mr->lkey; + sge[i].lkey = priv->pd->local_dma_lkey; sge[0].length = IPOIB_CM_HEAD_SIZE; for (i = 1; i < priv->cm.num_frags; ++i) @@ -694,24 +694,21 @@ repost: static inline int post_send(struct ipoib_dev_priv *priv, struct ipoib_cm_tx *tx, unsigned int wr_id, - u64 addr, int len) + struct ipoib_tx_buf *tx_req) { struct ib_send_wr *bad_wr; - priv->tx_sge[0].addr = addr; - priv->tx_sge[0].length = len; + ipoib_build_sge(priv, tx_req); - priv->tx_wr.num_sge = 1; - priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; + priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; - return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); + return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr); } void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_cm_tx_buf *tx_req; - u64 addr; + struct ipoib_tx_buf *tx_req; int rc; if (unlikely(skb->len > tx->mtu)) { @@ -735,24 +732,21 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ */ tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; tx_req->skb = skb; - addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { + + if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { ++dev->stats.tx_errors; dev_kfree_skb_any(skb); return; } - tx_req->mapping = addr; - skb_orphan(skb); skb_dst_drop(skb); - rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), - addr, skb->len); + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req); if (unlikely(rc)) { ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; - ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); } else { dev->trans_start = jiffies; @@ -777,7 +771,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; - struct ipoib_cm_tx_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long flags; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", @@ -791,7 +785,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &tx->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->stats.tx_packets; @@ -854,7 +848,7 @@ int ipoib_cm_dev_open(struct net_device *dev) } ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), - 0, NULL); + 0); if (ret) { printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, IPOIB_CM_IETF_ID | priv->qp->qp_num); @@ -1036,6 +1030,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; + if (dev->features & NETIF_F_SG) + attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", @@ -1170,7 +1167,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); - struct ipoib_cm_tx_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1197,8 +1194,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, - DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; netif_tx_lock_bh(p->dev); @@ -1455,7 +1451,6 @@ static void ipoib_cm_stale_task(struct work_struct *work) spin_unlock_irq(&priv->lock); } - static ssize_t show_mode(struct device *d, struct device_attribute *attr, char *buf) { diff --git a/kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 63b92cbb2..5ea0c1407 100644 --- a/kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -263,8 +263,7 @@ repost: "for buf %d\n", wr_id); } -static int ipoib_dma_map_tx(struct ib_device *ca, - struct ipoib_tx_buf *tx_req) +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req) { struct sk_buff *skb = tx_req->skb; u64 *mapping = tx_req->mapping; @@ -305,8 +304,8 @@ partial_error: return -EIO; } -static void ipoib_dma_unmap_tx(struct ib_device *ca, - struct ipoib_tx_buf *tx_req) +void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req) { struct sk_buff *skb = tx_req->skb; u64 *mapping = tx_req->mapping; @@ -314,7 +313,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, int off; if (skb_headlen(skb)) { - ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, mapping[0], skb_headlen(skb), + DMA_TO_DEVICE); off = 1; } else off = 0; @@ -322,8 +322,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag), - DMA_TO_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i + off], + skb_frag_size(frag), DMA_TO_DEVICE); } } @@ -389,7 +389,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &priv->tx_ring[wr_id]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); ++dev->stats.tx_packets; dev->stats.tx_bytes += tx_req->skb->len; @@ -514,37 +514,23 @@ static inline int post_send(struct ipoib_dev_priv *priv, void *head, int hlen) { struct ib_send_wr *bad_wr; - int i, off; struct sk_buff *skb = tx_req->skb; - skb_frag_t *frags = skb_shinfo(skb)->frags; - int nr_frags = skb_shinfo(skb)->nr_frags; - u64 *mapping = tx_req->mapping; - if (skb_headlen(skb)) { - priv->tx_sge[0].addr = mapping[0]; - priv->tx_sge[0].length = skb_headlen(skb); - off = 1; - } else - off = 0; + ipoib_build_sge(priv, tx_req); - for (i = 0; i < nr_frags; ++i) { - priv->tx_sge[i + off].addr = mapping[i + off]; - priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); - } - priv->tx_wr.num_sge = nr_frags + off; - priv->tx_wr.wr_id = wr_id; - priv->tx_wr.wr.ud.remote_qpn = qpn; - priv->tx_wr.wr.ud.ah = address; + priv->tx_wr.wr.wr_id = wr_id; + priv->tx_wr.remote_qpn = qpn; + priv->tx_wr.ah = address; if (head) { - priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size; - priv->tx_wr.wr.ud.header = head; - priv->tx_wr.wr.ud.hlen = hlen; - priv->tx_wr.opcode = IB_WR_LSO; + priv->tx_wr.mss = skb_shinfo(skb)->gso_size; + priv->tx_wr.header = head; + priv->tx_wr.hlen = hlen; + priv->tx_wr.wr.opcode = IB_WR_LSO; } else - priv->tx_wr.opcode = IB_WR_SEND; + priv->tx_wr.wr.opcode = IB_WR_SEND; - return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); + return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr); } void ipoib_send(struct net_device *dev, struct sk_buff *skb, @@ -597,9 +583,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, } if (skb->ip_summed == CHECKSUM_PARTIAL) - priv->tx_wr.send_flags |= IB_SEND_IP_CSUM; + priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM; else - priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; + priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); @@ -617,7 +603,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; --priv->tx_outstanding; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); if (netif_queue_stopped(dev)) netif_wake_queue(dev); @@ -868,7 +854,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) while ((int) priv->tx_tail - (int) priv->tx_head < 0) { tx_req = &priv->tx_ring[priv->tx_tail & (ipoib_sendq_size - 1)]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; --priv->tx_outstanding; @@ -985,20 +971,21 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv) } static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, - enum ipoib_flush_level level) + enum ipoib_flush_level level, + int nesting) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; int result; - down_read(&priv->vlan_rwsem); + down_read_nested(&priv->vlan_rwsem, nesting); /* * Flush any child interfaces too -- they might be up even if * the parent is down. */ list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, level); + __ipoib_ib_dev_flush(cpriv, level, nesting + 1); up_read(&priv->vlan_rwsem); @@ -1076,7 +1063,7 @@ void ipoib_ib_dev_flush_light(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_light); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0); } void ipoib_ib_dev_flush_normal(struct work_struct *work) @@ -1084,7 +1071,7 @@ void ipoib_ib_dev_flush_normal(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_normal); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0); } void ipoib_ib_dev_flush_heavy(struct work_struct *work) @@ -1092,7 +1079,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); } void ipoib_ib_dev_cleanup(struct net_device *dev) diff --git a/kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c b/kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9e1b203d7..7d3281866 100644 --- a/kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -48,6 +48,9 @@ #include <linux/jhash.h> #include <net/arp.h> +#include <net/addrconf.h> +#include <linux/inetdevice.h> +#include <rdma/ib_cache.h> #define DRV_VERSION "1.0.0" @@ -89,13 +92,18 @@ struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); -static void ipoib_remove_one(struct ib_device *device); +static void ipoib_remove_one(struct ib_device *device, void *client_data); static void ipoib_neigh_reclaim(struct rcu_head *rp); +static struct net_device *ipoib_get_net_dev_by_params( + struct ib_device *dev, u8 port, u16 pkey, + const union ib_gid *gid, const struct sockaddr *addr, + void *client_data); static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, - .remove = ipoib_remove_one + .remove = ipoib_remove_one, + .get_net_dev_by_params = ipoib_get_net_dev_by_params, }; int ipoib_open(struct net_device *dev) @@ -190,7 +198,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu struct ipoib_dev_priv *priv = netdev_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) - features &= ~(NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO); + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); return features; } @@ -222,6 +230,225 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } +/* Called with an RCU read lock taken */ +static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr, + struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct in_device *in_dev; + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr; + __be32 ret_addr; + + switch (addr->sa_family) { + case AF_INET: + in_dev = in_dev_get(dev); + if (!in_dev) + return false; + + ret_addr = inet_confirm_addr(net, in_dev, 0, + addr_in->sin_addr.s_addr, + RT_SCOPE_HOST); + in_dev_put(in_dev); + if (ret_addr) + return true; + + break; + case AF_INET6: + if (IS_ENABLED(CONFIG_IPV6) && + ipv6_chk_addr(net, &addr_in6->sin6_addr, dev, 1)) + return true; + + break; + } + return false; +} + +/** + * Find the master net_device on top of the given net_device. + * @dev: base IPoIB net_device + * + * Returns the master net_device with a reference held, or the same net_device + * if no master exists. + */ +static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) +{ + struct net_device *master; + + rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); + if (master) + dev_hold(master); + rcu_read_unlock(); + + if (master) + return master; + + dev_hold(dev); + return dev; +} + +/** + * Find a net_device matching the given address, which is an upper device of + * the given net_device. + * @addr: IP address to look for. + * @dev: base IPoIB net_device + * + * If found, returns the net_device with a reference held. Otherwise return + * NULL. + */ +static struct net_device *ipoib_get_net_dev_match_addr( + const struct sockaddr *addr, struct net_device *dev) +{ + struct net_device *upper, + *result = NULL; + struct list_head *iter; + + rcu_read_lock(); + if (ipoib_is_dev_match_addr_rcu(addr, dev)) { + dev_hold(dev); + result = dev; + goto out; + } + + netdev_for_each_all_upper_dev_rcu(dev, upper, iter) { + if (ipoib_is_dev_match_addr_rcu(addr, upper)) { + dev_hold(upper); + result = upper; + break; + } + } +out: + rcu_read_unlock(); + return result; +} + +/* returns the number of IPoIB netdevs on top a given ipoib device matching a + * pkey_index and address, if one exists. + * + * @found_net_dev: contains a matching net_device if the return value >= 1, + * with a reference held. */ +static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, + const union ib_gid *gid, + u16 pkey_index, + const struct sockaddr *addr, + int nesting, + struct net_device **found_net_dev) +{ + struct ipoib_dev_priv *child_priv; + struct net_device *net_dev = NULL; + int matches = 0; + + if (priv->pkey_index == pkey_index && + (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) { + if (!addr) { + net_dev = ipoib_get_master_net_dev(priv->dev); + } else { + /* Verify the net_device matches the IP address, as + * IPoIB child devices currently share a GID. */ + net_dev = ipoib_get_net_dev_match_addr(addr, priv->dev); + } + if (net_dev) { + if (!*found_net_dev) + *found_net_dev = net_dev; + else + dev_put(net_dev); + ++matches; + } + } + + /* Check child interfaces */ + down_read_nested(&priv->vlan_rwsem, nesting); + list_for_each_entry(child_priv, &priv->child_intfs, list) { + matches += ipoib_match_gid_pkey_addr(child_priv, gid, + pkey_index, addr, + nesting + 1, + found_net_dev); + if (matches > 1) + break; + } + up_read(&priv->vlan_rwsem); + + return matches; +} + +/* Returns the number of matching net_devs found (between 0 and 2). Also + * return the matching net_device in the @net_dev parameter, holding a + * reference to the net_device, if the number of matches >= 1 */ +static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port, + u16 pkey_index, + const union ib_gid *gid, + const struct sockaddr *addr, + struct net_device **net_dev) +{ + struct ipoib_dev_priv *priv; + int matches = 0; + + *net_dev = NULL; + + list_for_each_entry(priv, dev_list, list) { + if (priv->port != port) + continue; + + matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index, + addr, 0, net_dev); + if (matches > 1) + break; + } + + return matches; +} + +static struct net_device *ipoib_get_net_dev_by_params( + struct ib_device *dev, u8 port, u16 pkey, + const union ib_gid *gid, const struct sockaddr *addr, + void *client_data) +{ + struct net_device *net_dev; + struct list_head *dev_list = client_data; + u16 pkey_index; + int matches; + int ret; + + if (!rdma_protocol_ib(dev, port)) + return NULL; + + ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index); + if (ret) + return NULL; + + if (!dev_list) + return NULL; + + /* See if we can find a unique device matching the L2 parameters */ + matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, + gid, NULL, &net_dev); + + switch (matches) { + case 0: + return NULL; + case 1: + return net_dev; + } + + dev_put(net_dev); + + /* Couldn't find a unique device with L2 parameters only. Use L3 + * address to uniquely match the net device */ + matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, + gid, addr, &net_dev); + switch (matches) { + case 0: + return NULL; + default: + dev_warn_ratelimited(&dev->dev, + "duplicate IP address detected\n"); + /* Fall through */ + case 1: + return net_dev; + } +} + int ipoib_set_mode(struct net_device *dev, const char *buf) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -232,8 +459,9 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); netdev_update_features(dev); + dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); rtnl_unlock(); - priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; + priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); rtnl_lock(); @@ -921,6 +1149,9 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) unsigned long dt; unsigned long flags; int i; + LIST_HEAD(remove_list); + struct ipoib_mcast *mcast, *tmcast; + struct net_device *dev = priv->dev; if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) return; @@ -948,6 +1179,19 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) lockdep_is_held(&priv->lock))) != NULL) { /* was the neigh idle for two GC periods */ if (time_after(neigh_obsolete, neigh->alive)) { + u8 *mgid = neigh->daddr + 4; + + /* Is this multicast ? */ + if (*mgid == 0xff) { + mcast = __ipoib_mcast_find(dev, mgid); + + if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { + list_del(&mcast->list); + rb_erase(&mcast->rb_node, &priv->multicast_tree); + list_add_tail(&mcast->list, &remove_list); + } + } + rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); @@ -963,6 +1207,10 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) out_unlock: spin_unlock_irqrestore(&priv->lock, flags); + list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { + ipoib_mcast_leave(dev, mcast); + ipoib_mcast_free(mcast); + } } static void ipoib_reap_neigh(struct work_struct *work) @@ -1128,7 +1376,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) { struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; - struct ipoib_neigh **buckets; + struct ipoib_neigh __rcu **buckets; u32 size; clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); @@ -1146,7 +1394,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl->size = size; htbl->mask = (size - 1); htbl->buckets = buckets; - ntbl->htbl = htbl; + RCU_INIT_POINTER(ntbl->htbl, htbl); htbl->ntbl = ntbl; atomic_set(&ntbl->entries, 0); @@ -1577,7 +1825,8 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); priv->dev->dev_id = port - 1; - if (!ib_query_port(hca, port, &attr)) + result = ib_query_port(hca, port, &attr); + if (!result) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); else { printk(KERN_WARNING "%s: ib_query_port %d failed\n", @@ -1598,7 +1847,8 @@ static struct net_device *ipoib_add_port(const char *format, goto device_init_failed; } - if (ipoib_set_dev_features(priv, hca)) + result = ipoib_set_dev_features(priv, hca); + if (result) goto device_init_failed; /* @@ -1610,7 +1860,7 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; - result = ib_query_gid(hca, port, 0, &priv->local_gid); + result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); if (result) { printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); @@ -1684,10 +1934,8 @@ static void ipoib_add_one(struct ib_device *device) struct list_head *dev_list; struct net_device *dev; struct ipoib_dev_priv *priv; - int s, e, p; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + int p; + int count = 0; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) @@ -1695,36 +1943,30 @@ static void ipoib_add_one(struct ib_device *device) INIT_LIST_HEAD(dev_list); - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = 0; - e = 0; - } else { - s = 1; - e = device->phys_port_cnt; - } - - for (p = s; p <= e; ++p) { - if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { + if (!rdma_protocol_ib(device, p)) continue; dev = ipoib_add_port("ib%d", device, p); if (!IS_ERR(dev)) { priv = netdev_priv(dev); list_add_tail(&priv->list, dev_list); + count++; } } + if (!count) { + kfree(dev_list); + return; + } + ib_set_client_data(device, &ipoib_client, dev_list); } -static void ipoib_remove_one(struct ib_device *device) +static void ipoib_remove_one(struct ib_device *device, void *client_data) { struct ipoib_dev_priv *priv, *tmp; - struct list_head *dev_list; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + struct list_head *dev_list = client_data; - dev_list = ib_get_client_data(device, &ipoib_client); if (!dev_list) return; diff --git a/kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 140c94ce7..6dcef673d 100644 --- a/kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -106,7 +106,7 @@ static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv, queue_delayed_work(priv->wq, &priv->mcast_task, 0); } -static void ipoib_mcast_free(struct ipoib_mcast *mcast) +void ipoib_mcast_free(struct ipoib_mcast *mcast) { struct net_device *dev = mcast->dev; int tx_dropped = 0; @@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, return mcast; } -static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) +struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->multicast_tree.rb_node; @@ -245,7 +245,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); - priv->tx_wr.wr.ud.remote_qkey = priv->qkey; + priv->tx_wr.remote_qkey = priv->qkey; set_qkey = 1; } @@ -393,8 +393,13 @@ static int ipoib_mcast_join_complete(int status, goto out_locked; } } else { - if (mcast->logcount++ < 20) { - if (status == -ETIMEDOUT || status == -EAGAIN) { + bool silent_fail = + test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && + status == -EINVAL; + + if (mcast->logcount < 20) { + if (status == -ETIMEDOUT || status == -EAGAIN || + silent_fail) { ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n", test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", mcast->mcmember.mgid.raw, status); @@ -403,6 +408,9 @@ static int ipoib_mcast_join_complete(int status, test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", mcast->mcmember.mgid.raw, status); } + + if (!silent_fail) + mcast->logcount++; } if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && @@ -448,8 +456,7 @@ out_locked: return status; } -static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, - int create) +static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_sa_multicast *multicast; @@ -471,7 +478,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE; - if (create) { + if (mcast != priv->broadcast) { + /* + * RFC 4391: + * The MGID MUST use the same P_Key, Q_Key, SL, MTU, + * and HopLimit as those used in the broadcast-GID. The rest + * of attributes SHOULD follow the values used in the + * broadcast-GID as well. + */ comp_mask |= IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_MTU_SELECTOR | @@ -492,6 +506,24 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, rec.sl = priv->broadcast->mcmember.sl; rec.flow_label = priv->broadcast->mcmember.flow_label; rec.hop_limit = priv->broadcast->mcmember.hop_limit; + + /* + * Send-only IB Multicast joins do not work at the core + * IB layer yet, so we can't use them here. However, + * we are emulating an Ethernet multicast send, which + * does not require a multicast subscription and will + * still send properly. The most appropriate thing to + * do is to create the group if it doesn't exist as that + * most closely emulates the behavior, from a user space + * application perspecitive, of Ethernet multicast + * operation. For now, we do a full join, maybe later + * when the core IB layers support send only joins we + * will use them. + */ +#if 0 + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) + rec.join_state = 4; +#endif } multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, @@ -517,7 +549,6 @@ void ipoib_mcast_join_task(struct work_struct *work) struct ib_port_attr port_attr; unsigned long delay_until = 0; struct ipoib_mcast *mcast = NULL; - int create = 1; if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) return; @@ -530,7 +561,7 @@ void ipoib_mcast_join_task(struct work_struct *work) } priv->local_lid = port_attr.lid; - if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) + if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL)) ipoib_warn(priv, "ib_query_gid() failed\n"); else memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); @@ -566,7 +597,6 @@ void ipoib_mcast_join_task(struct work_struct *work) if (IS_ERR_OR_NULL(priv->broadcast->mc) && !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) { mcast = priv->broadcast; - create = 0; if (mcast->backoff > 1 && time_before(jiffies, mcast->delay_until)) { delay_until = mcast->delay_until; @@ -590,12 +620,8 @@ void ipoib_mcast_join_task(struct work_struct *work) /* Found the next unjoined group */ init_completion(&mcast->done); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) - create = 0; - else - create = 1; spin_unlock_irq(&priv->lock); - ipoib_mcast_join(dev, mcast, create); + ipoib_mcast_join(dev, mcast); spin_lock_irq(&priv->lock); } else if (!delay_until || time_before(mcast->delay_until, delay_until)) @@ -618,7 +644,7 @@ out: } spin_unlock_irq(&priv->lock); if (mcast) - ipoib_mcast_join(dev, mcast, create); + ipoib_mcast_join(dev, mcast); } int ipoib_mcast_start_thread(struct net_device *dev) @@ -651,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev) return 0; } -static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) +int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret = 0; diff --git a/kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 2d13fd08c..d48c5bae7 100644 --- a/kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -141,6 +141,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD }; + struct ib_cq_init_attr cq_attr = {}; int ret, size; int i; @@ -151,12 +152,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) return -ENODEV; } - priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(priv->mr)) { - printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); - goto out_free_pd; - } - /* * the various IPoIB tasks assume they will never race against * themselves, so always use a single thread workqueue @@ -164,7 +159,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->wq = create_singlethread_workqueue("ipoib_wq"); if (!priv->wq) { printk(KERN_WARNING "ipoib: failed to allocate device WQ\n"); - goto out_free_mr; + goto out_free_pd; } size = ipoib_recvq_size + 1; @@ -179,14 +174,17 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (ret != -ENOSYS) goto out_free_wq; - priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); + cq_attr.cqe = size; + priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, + dev, &cq_attr); if (IS_ERR(priv->recv_cq)) { printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_cm_dev_cleanup; } + cq_attr.cqe = ipoib_sendq_size; priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, - dev, ipoib_sendq_size, 0); + dev, &cq_attr); if (IS_ERR(priv->send_cq)) { printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); goto out_free_recv_cq; @@ -221,13 +219,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) - priv->tx_sge[i].lkey = priv->mr->lkey; + priv->tx_sge[i].lkey = priv->pd->local_dma_lkey; - priv->tx_wr.opcode = IB_WR_SEND; - priv->tx_wr.sg_list = priv->tx_sge; - priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->tx_wr.wr.opcode = IB_WR_SEND; + priv->tx_wr.wr.sg_list = priv->tx_sge; + priv->tx_wr.wr.send_flags = IB_SEND_SIGNALED; - priv->rx_sge[0].lkey = priv->mr->lkey; + priv->rx_sge[0].lkey = priv->pd->local_dma_lkey; priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); priv->rx_wr.num_sge = 1; @@ -250,9 +248,6 @@ out_free_wq: destroy_workqueue(priv->wq); priv->wq = NULL; -out_free_mr: - ib_dereg_mr(priv->mr); - out_free_pd: ib_dealloc_pd(priv->pd); @@ -285,12 +280,7 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) priv->wq = NULL; } - if (ib_dereg_mr(priv->mr)) - ipoib_warn(priv, "ib_dereg_mr failed\n"); - - if (ib_dealloc_pd(priv->pd)) - ipoib_warn(priv, "ib_dealloc_pd failed\n"); - + ib_dealloc_pd(priv->pd); } void ipoib_event(struct ib_event_handler *handler, diff --git a/kernel/drivers/infiniband/ulp/iser/iscsi_iser.c b/kernel/drivers/infiniband/ulp/iser/iscsi_iser.c index c933d882c..9080161e0 100644 --- a/kernel/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/kernel/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -74,36 +74,44 @@ #include "iscsi_iser.h" +MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz"); +MODULE_VERSION(DRV_VER); + static struct scsi_host_template iscsi_iser_sht; static struct iscsi_transport iscsi_iser_transport; static struct scsi_transport_template *iscsi_iser_scsi_transport; +static struct workqueue_struct *release_wq; +struct iser_global ig; + +int iser_debug_level = 0; +module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); static unsigned int iscsi_max_lun = 512; module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); +MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session (default:512"); -int iser_debug_level = 0; -bool iser_pi_enable = false; -int iser_pi_guard = 1; +unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS; +module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024"); -MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz"); -MODULE_VERSION(DRV_VER); - -module_param_named(debug_level, iser_debug_level, int, 0644); -MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); +bool iser_always_reg = true; +module_param_named(always_register, iser_always_reg, bool, S_IRUGO); +MODULE_PARM_DESC(always_register, + "Always register memory, even for continuous memory regions (default:true)"); -module_param_named(pi_enable, iser_pi_enable, bool, 0644); +bool iser_pi_enable = false; +module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO); MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); -module_param_named(pi_guard, iser_pi_guard, int, 0644); +int iser_pi_guard; +module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO); MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]"); -static struct workqueue_struct *release_wq; -struct iser_global ig; - /* - * iscsi_iser_recv() - Process a successfull recv completion + * iscsi_iser_recv() - Process a successful recv completion * @conn: iscsi connection * @hdr: iscsi header * @rx_data: buffer containing receive data payload @@ -118,7 +126,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, { int rc = 0; int datalen; - int ahslen; /* verify PDU length */ datalen = ntoh24(hdr->dlength); @@ -133,9 +140,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, iser_dbg("aligned datalen (%d) hdr, %d (IB)\n", datalen, rx_data_len); - /* read AHS */ - ahslen = hdr->hlength * 4; - rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); if (rc && rc != ISCSI_ERR_NO_SCSI_CMD) goto error; @@ -201,11 +205,12 @@ iser_initialize_task_headers(struct iscsi_task *task, goto out; } + tx_desc->wr_idx = 0; tx_desc->mapped = true; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; - tx_desc->tx_sg[0].lkey = device->mr->lkey; + tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey; iser_task->iser_conn = iser_conn; out: @@ -626,6 +631,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, if (ep) { iser_conn = ep->dd_data; max_cmds = iser_conn->max_cmds; + shost->sg_tablesize = iser_conn->scsi_sg_tablesize; + shost->max_sectors = iser_conn->scsi_max_sectors; mutex_lock(&iser_conn->state_mutex); if (iser_conn->state != ISER_CONN_UP) { @@ -644,6 +651,15 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, SHOST_DIX_GUARD_CRC); } + /* + * Limit the sg_tablesize and max_sectors based on the device + * max fastreg page list length. + */ + shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize, + ib_conn->device->dev_attr.max_fast_reg_page_list_len); + shost->max_sectors = min_t(unsigned int, + 1024, (shost->sg_tablesize * PAGE_SIZE) >> 9); + if (iscsi_host_add(shost, ib_conn->device->ib_device->dma_device)) { mutex_unlock(&iser_conn->state_mutex); @@ -746,15 +762,7 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */ stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; - stats->custom_length = 4; - strcpy(stats->custom[0].desc, "qp_tx_queue_full"); - stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */ - strcpy(stats->custom[1].desc, "fmr_map_not_avail"); - stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */; - strcpy(stats->custom[2].desc, "eh_abort_cnt"); - stats->custom[2].value = conn->eh_abort_cnt; - strcpy(stats->custom[3].desc, "fmr_unalign_cnt"); - stats->custom[3].value = conn->fmr_unalign_cnt; + stats->custom_length = 0; } static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, @@ -843,10 +851,9 @@ failure: static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) { - struct iser_conn *iser_conn; + struct iser_conn *iser_conn = ep->dd_data; int rc; - iser_conn = ep->dd_data; rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion, msecs_to_jiffies(timeout_ms)); /* if conn establishment failed, return error code to iscsi */ @@ -858,7 +865,7 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) mutex_unlock(&iser_conn->state_mutex); } - iser_info("ib conn %p rc = %d\n", iser_conn, rc); + iser_info("iser conn %p rc = %d\n", iser_conn, rc); if (rc > 0) return 1; /* success, this is the equivalent of POLLOUT */ @@ -880,11 +887,9 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) { - struct iser_conn *iser_conn; + struct iser_conn *iser_conn = ep->dd_data; - iser_conn = ep->dd_data; - iser_info("ep %p iser conn %p state %d\n", - ep, iser_conn, iser_conn->state); + iser_info("ep %p iser conn %p\n", ep, iser_conn); mutex_lock(&iser_conn->state_mutex); iser_conn_terminate(iser_conn); @@ -904,6 +909,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) mutex_unlock(&iser_conn->state_mutex); iser_conn_release(iser_conn); } + iscsi_destroy_endpoint(ep); } @@ -961,19 +967,27 @@ static umode_t iser_attr_is_visible(int param_type, int param) return 0; } +static int iscsi_iser_slave_alloc(struct scsi_device *sdev) +{ + blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); + + return 0; +} + static struct scsi_host_template iscsi_iser_sht = { .module = THIS_MODULE, .name = "iSCSI Initiator over iSER", .queuecommand = iscsi_queuecommand, .change_queue_depth = scsi_change_queue_depth, - .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, - .max_sectors = 1024, + .sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE, + .max_sectors = ISER_DEF_MAX_SECTORS, .cmd_per_lun = ISER_DEF_CMD_PER_LUN, .eh_abort_handler = iscsi_eh_abort, .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, - .use_clustering = DISABLE_CLUSTERING, + .use_clustering = ENABLE_CLUSTERING, + .slave_alloc = iscsi_iser_slave_alloc, .proc_name = "iscsi_iser", .this_id = -1, .track_queue_depth = 1, @@ -1078,7 +1092,7 @@ static void __exit iser_exit(void) if (!connlist_empty) { iser_err("Error cleanup stage completed but we still have iser " - "connections, destroying them anyway.\n"); + "connections, destroying them anyway\n"); list_for_each_entry_safe(iser_conn, n, &ig.connlist, conn_list) { iser_conn_release(iser_conn); diff --git a/kernel/drivers/infiniband/ulp/iser/iscsi_iser.h b/kernel/drivers/infiniband/ulp/iser/iscsi_iser.h index d2b6caf76..8a5998e6a 100644 --- a/kernel/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/kernel/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -98,8 +98,13 @@ #define SHIFT_4K 12 #define SIZE_4K (1ULL << SHIFT_4K) #define MASK_4K (~(SIZE_4K-1)) - /* support up to 512KB in one RDMA */ -#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) + +/* Default support is 512KB I/O size */ +#define ISER_DEF_MAX_SECTORS 1024 +#define ISCSI_ISER_DEF_SG_TABLESIZE ((ISER_DEF_MAX_SECTORS * 512) >> SHIFT_4K) +/* Maximum support is 8MB I/O size */ +#define ISCSI_ISER_MAX_SG_TABLESIZE ((16384 * 512) >> SHIFT_4K) + #define ISER_DEF_XMIT_CMDS_DEFAULT 512 #if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT #define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX @@ -222,23 +227,19 @@ enum iser_data_dir { * @size: num entries of this sg * @data_len: total beffer byte len * @dma_nents: returned by dma_map_sg - * @orig_sg: pointer to the original sg list (in case - * we used a copy) - * @orig_size: num entris of orig sg list */ struct iser_data_buf { struct scatterlist *sg; - unsigned int size; + int size; unsigned long data_len; unsigned int dma_nents; - struct scatterlist *orig_sg; - unsigned int orig_size; - }; +}; /* fwd declarations */ struct iser_device; struct iscsi_iser_task; struct iscsi_endpoint; +struct iser_reg_resources; /** * struct iser_mem_reg - iSER memory registration info @@ -259,6 +260,14 @@ enum iser_desc_type { ISCSI_TX_DATAOUT }; +/* Maximum number of work requests per task: + * Data memory region local invalidate + fast registration + * Protection memory region local invalidate + fast registration + * Signature memory region local invalidate + fast registration + * PDU send + */ +#define ISER_MAX_WRS 7 + /** * struct iser_tx_desc - iSER TX descriptor (for send wr_id) * @@ -271,6 +280,11 @@ enum iser_desc_type { * unsolicited data-out or control * @num_sge: number sges used on this TX task * @mapped: Is the task header mapped + * @wr_idx: Current WR index + * @wrs: Array of WRs per task + * @data_reg: Data buffer registration details + * @prot_reg: Protection buffer registration details + * @sig_attrs: Signature attributes */ struct iser_tx_desc { struct iser_hdr iser_header; @@ -280,6 +294,15 @@ struct iser_tx_desc { struct ib_sge tx_sg[2]; int num_sge; bool mapped; + u8 wr_idx; + union iser_wr { + struct ib_send_wr send; + struct ib_reg_wr fast_reg; + struct ib_sig_handover_wr sig; + } wrs[ISER_MAX_WRS]; + struct iser_mem_reg data_reg; + struct iser_mem_reg prot_reg; + struct ib_sig_attrs sig_attrs; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ @@ -326,6 +349,33 @@ struct iser_comp { }; /** + * struct iser_device - Memory registration operations + * per-device registration schemes + * + * @alloc_reg_res: Allocate registration resources + * @free_reg_res: Free registration resources + * @fast_reg_mem: Register memory buffers + * @unreg_mem: Un-register memory buffers + * @reg_desc_get: Get a registration descriptor for pool + * @reg_desc_put: Get a registration descriptor to pool + */ +struct iser_reg_ops { + int (*alloc_reg_res)(struct ib_conn *ib_conn, + unsigned cmds_max, + unsigned int size); + void (*free_reg_res)(struct ib_conn *ib_conn); + int (*reg_mem)(struct iscsi_iser_task *iser_task, + struct iser_data_buf *mem, + struct iser_reg_resources *rsc, + struct iser_mem_reg *reg); + void (*unreg_mem)(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); + struct iser_fr_desc * (*reg_desc_get)(struct ib_conn *ib_conn); + void (*reg_desc_put)(struct ib_conn *ib_conn, + struct iser_fr_desc *desc); +}; + +/** * struct iser_device - iSER device handle * * @ib_device: RDMA device @@ -338,11 +388,7 @@ struct iser_comp { * @comps_used: Number of completion contexts used, Min between online * cpus and device max completion vectors * @comps: Dinamically allocated array of completion handlers - * Memory registration pool Function pointers (FMR or Fastreg): - * @iser_alloc_rdma_reg_res: Allocation of memory regions pool - * @iser_free_rdma_reg_res: Free of memory regions pool - * @iser_reg_rdma_mem: Memory registration routine - * @iser_unreg_rdma_mem: Memory deregistration routine + * @reg_ops: Registration ops */ struct iser_device { struct ib_device *ib_device; @@ -354,54 +400,69 @@ struct iser_device { int refcount; int comps_used; struct iser_comp *comps; - int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn, - unsigned cmds_max); - void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn); - int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir); - void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir); + struct iser_reg_ops *reg_ops; }; #define ISER_CHECK_GUARD 0xc0 #define ISER_CHECK_REFTAG 0x0f #define ISER_CHECK_APPTAG 0x30 -enum iser_reg_indicator { - ISER_DATA_KEY_VALID = 1 << 0, - ISER_PROT_KEY_VALID = 1 << 1, - ISER_SIG_KEY_VALID = 1 << 2, - ISER_FASTREG_PROTECTED = 1 << 3, +/** + * struct iser_reg_resources - Fast registration recources + * + * @mr: memory region + * @fmr_pool: pool of fmrs + * @page_vec: fast reg page list used by fmr pool + * @mr_valid: is mr valid indicator + */ +struct iser_reg_resources { + union { + struct ib_mr *mr; + struct ib_fmr_pool *fmr_pool; + }; + struct iser_page_vec *page_vec; + u8 mr_valid:1; }; /** * struct iser_pi_context - Protection information context * - * @prot_mr: protection memory region - * @prot_frpl: protection fastreg page list - * @sig_mr: signature feature enabled memory region + * @rsc: protection buffer registration resources + * @sig_mr: signature enable memory region + * @sig_mr_valid: is sig_mr valid indicator + * @sig_protected: is region protected indicator */ struct iser_pi_context { - struct ib_mr *prot_mr; - struct ib_fast_reg_page_list *prot_frpl; + struct iser_reg_resources rsc; struct ib_mr *sig_mr; + u8 sig_mr_valid:1; + u8 sig_protected:1; }; /** - * struct fast_reg_descriptor - Fast registration descriptor + * struct iser_fr_desc - Fast registration descriptor * * @list: entry in connection fastreg pool - * @data_mr: data memory region - * @data_frpl: data fastreg page list + * @rsc: data buffer registration resources * @pi_ctx: protection information context - * @reg_indicators: fast registration indicators */ -struct fast_reg_descriptor { +struct iser_fr_desc { struct list_head list; - struct ib_mr *data_mr; - struct ib_fast_reg_page_list *data_frpl; + struct iser_reg_resources rsc; struct iser_pi_context *pi_ctx; - u8 reg_indicators; +}; + +/** + * struct iser_fr_pool: connection fast registration pool + * + * @list: list of fastreg descriptors + * @lock: protects fmr/fastreg pool + * @size: size of the pool + */ +struct iser_fr_pool { + struct list_head list; + spinlock_t lock; + int size; }; /** @@ -417,15 +478,7 @@ struct fast_reg_descriptor { * @pi_support: Indicate device T10-PI support * @beacon: beacon send wr to signal all flush errors were drained * @flush_comp: completes when all connection completions consumed - * @lock: protects fmr/fastreg pool - * @union.fmr: - * @pool: FMR pool for fast registrations - * @page_vec: page vector to hold mapped commands pages - * used for registration - * @union.fastreg: - * @pool: Fast registration descriptors pool for fast - * registrations - * @pool_size: Size of pool + * @fr_pool: connection fast registration poool */ struct ib_conn { struct rdma_cm_id *cma_id; @@ -438,17 +491,7 @@ struct ib_conn { bool pi_support; struct ib_send_wr beacon; struct completion flush_comp; - spinlock_t lock; - union { - struct { - struct ib_fmr_pool *pool; - struct iser_page_vec *page_vec; - } fmr; - struct { - struct list_head pool; - int pool_size; - } fastreg; - }; + struct iser_fr_pool fr_pool; }; /** @@ -479,6 +522,8 @@ struct ib_conn { * @rx_desc_head: head of rx_descs cyclic buffer * @rx_descs: rx buffers array (cyclic buffer) * @num_rx_descs: number of rx descriptors + * @scsi_sg_tablesize: scsi host sg_tablesize + * @scsi_max_sectors: scsi host max sectors */ struct iser_conn { struct ib_conn ib_conn; @@ -503,6 +548,8 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; u32 num_rx_descs; + unsigned short scsi_sg_tablesize; + unsigned int scsi_max_sectors; }; /** @@ -558,6 +605,10 @@ extern struct iser_global ig; extern int iser_debug_level; extern bool iser_pi_enable; extern int iser_pi_guard; +extern unsigned int iser_max_sectors; +extern bool iser_always_reg; + +int iser_assign_reg_ops(struct iser_device *device); int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task); @@ -599,10 +650,10 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem(struct iscsi_iser_task *task, + enum iser_data_dir dir); +void iser_unreg_rdma_mem(struct iscsi_iser_task *task, + enum iser_data_dir dir); int iser_connect(struct iser_conn *iser_conn, struct sockaddr *src_addr, @@ -632,15 +683,40 @@ int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, struct iscsi_session *session); -int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max); +int iser_alloc_fmr_pool(struct ib_conn *ib_conn, + unsigned cmds_max, + unsigned int size); void iser_free_fmr_pool(struct ib_conn *ib_conn); -int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max); +int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, + unsigned cmds_max, + unsigned int size); void iser_free_fastreg_pool(struct ib_conn *ib_conn); u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector); -struct fast_reg_descriptor * -iser_reg_desc_get(struct ib_conn *ib_conn); +struct iser_fr_desc * +iser_reg_desc_get_fr(struct ib_conn *ib_conn); void -iser_reg_desc_put(struct ib_conn *ib_conn, - struct fast_reg_descriptor *desc); +iser_reg_desc_put_fr(struct ib_conn *ib_conn, + struct iser_fr_desc *desc); +struct iser_fr_desc * +iser_reg_desc_get_fmr(struct ib_conn *ib_conn); +void +iser_reg_desc_put_fmr(struct ib_conn *ib_conn, + struct iser_fr_desc *desc); + +static inline struct ib_send_wr * +iser_tx_next_wr(struct iser_tx_desc *tx_desc) +{ + struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx].send; + struct ib_send_wr *last_wr; + + if (tx_desc->wr_idx) { + last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1].send; + last_wr->next = cur_wr; + } + tx_desc->wr_idx++; + + return cur_wr; +} + #endif diff --git a/kernel/drivers/infiniband/ulp/iser/iser_initiator.c b/kernel/drivers/infiniband/ulp/iser/iser_initiator.c index 0a47f42fe..ffd00c420 100644 --- a/kernel/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/kernel/drivers/infiniband/ulp/iser/iser_initiator.c @@ -49,7 +49,6 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_mem_reg *mem_reg; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -73,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) return err; } - err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); + err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; @@ -103,7 +102,6 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_mem_reg *mem_reg; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -128,7 +126,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, return err; } - err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); + err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; @@ -170,13 +168,7 @@ static void iser_create_send_desc(struct iser_conn *iser_conn, memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); tx_desc->iser_header.flags = ISER_VER; - tx_desc->num_sge = 1; - - if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { - tx_desc->tx_sg[0].lkey = device->mr->lkey; - iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc); - } } static void iser_free_login_buf(struct iser_conn *iser_conn) @@ -266,7 +258,8 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; - if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max)) + if (device->reg_ops->alloc_reg_res(ib_conn, session->scsi_cmds_max, + iser_conn->scsi_sg_tablesize)) goto create_rdma_reg_res_failed; if (iser_alloc_login_buf(iser_conn)) @@ -291,7 +284,7 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, rx_sg = &rx_desc->rx_sg; rx_sg->addr = rx_desc->dma_addr; rx_sg->length = ISER_RX_PAYLOAD_SIZE; - rx_sg->lkey = device->mr->lkey; + rx_sg->lkey = device->pd->local_dma_lkey; } iser_conn->rx_desc_head = 0; @@ -307,7 +300,7 @@ rx_desc_dma_map_failed: rx_desc_alloc_fail: iser_free_login_buf(iser_conn); alloc_login_buf_fail: - device->iser_free_rdma_reg_res(ib_conn); + device->reg_ops->free_reg_res(ib_conn); create_rdma_reg_res_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; @@ -320,8 +313,8 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn) struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; - if (device->iser_free_rdma_reg_res) - device->iser_free_rdma_reg_res(ib_conn); + if (device->reg_ops->free_reg_res) + device->reg_ops->free_reg_res(ib_conn); rx_desc = iser_conn->rx_descs; for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) @@ -545,7 +538,7 @@ int iser_send_control(struct iscsi_conn *conn, tx_dsg->addr = iser_conn->login_req_dma; tx_dsg->length = task->data_count; - tx_dsg->lkey = device->mr->lkey; + tx_dsg->lkey = device->pd->local_dma_lkey; mdesc->num_sge = 2; } @@ -668,61 +661,25 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - struct iser_device *device = iser_task->iser_conn->ib_conn.device; - int is_rdma_data_aligned = 1; - int is_rdma_prot_aligned = 1; int prot_count = scsi_prot_sg_count(iser_task->sc); - /* if we were reading, copy back to unaligned sglist, - * anyway dma_unmap and free the copy - */ - if (iser_task->data[ISER_DIR_IN].orig_sg) { - is_rdma_data_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, - &iser_task->data[ISER_DIR_IN], - ISER_DIR_IN); - } - - if (iser_task->data[ISER_DIR_OUT].orig_sg) { - is_rdma_data_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, - &iser_task->data[ISER_DIR_OUT], - ISER_DIR_OUT); - } - - if (iser_task->prot[ISER_DIR_IN].orig_sg) { - is_rdma_prot_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, - &iser_task->prot[ISER_DIR_IN], - ISER_DIR_IN); - } - - if (iser_task->prot[ISER_DIR_OUT].orig_sg) { - is_rdma_prot_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, - &iser_task->prot[ISER_DIR_OUT], - ISER_DIR_OUT); - } - if (iser_task->dir[ISER_DIR_IN]) { - device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); - if (is_rdma_data_aligned) - iser_dma_unmap_task_data(iser_task, - &iser_task->data[ISER_DIR_IN], - DMA_FROM_DEVICE); - if (prot_count && is_rdma_prot_aligned) + iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_IN], + DMA_FROM_DEVICE); + if (prot_count) iser_dma_unmap_task_data(iser_task, &iser_task->prot[ISER_DIR_IN], DMA_FROM_DEVICE); } if (iser_task->dir[ISER_DIR_OUT]) { - device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); - if (is_rdma_data_aligned) - iser_dma_unmap_task_data(iser_task, - &iser_task->data[ISER_DIR_OUT], - DMA_TO_DEVICE); - if (prot_count && is_rdma_prot_aligned) + iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_OUT], + DMA_TO_DEVICE); + if (prot_count) iser_dma_unmap_task_data(iser_task, &iser_task->prot[ISER_DIR_OUT], DMA_TO_DEVICE); diff --git a/kernel/drivers/infiniband/ulp/iser/iser_memory.c b/kernel/drivers/infiniband/ulp/iser/iser_memory.c index f0cdc961e..ea765fb96 100644 --- a/kernel/drivers/infiniband/ulp/iser/iser_memory.c +++ b/kernel/drivers/infiniband/ulp/iser/iser_memory.c @@ -38,194 +38,97 @@ #include <linux/scatterlist.h> #include "iscsi_iser.h" +static +int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, + struct iser_data_buf *mem, + struct iser_reg_resources *rsc, + struct iser_mem_reg *mem_reg); +static +int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, + struct iser_data_buf *mem, + struct iser_reg_resources *rsc, + struct iser_mem_reg *mem_reg); + +static struct iser_reg_ops fastreg_ops = { + .alloc_reg_res = iser_alloc_fastreg_pool, + .free_reg_res = iser_free_fastreg_pool, + .reg_mem = iser_fast_reg_mr, + .unreg_mem = iser_unreg_mem_fastreg, + .reg_desc_get = iser_reg_desc_get_fr, + .reg_desc_put = iser_reg_desc_put_fr, +}; -static void -iser_free_bounce_sg(struct iser_data_buf *data) -{ - struct scatterlist *sg; - int count; - - for_each_sg(data->sg, sg, data->size, count) - __free_page(sg_page(sg)); - - kfree(data->sg); - - data->sg = data->orig_sg; - data->size = data->orig_size; - data->orig_sg = NULL; - data->orig_size = 0; -} +static struct iser_reg_ops fmr_ops = { + .alloc_reg_res = iser_alloc_fmr_pool, + .free_reg_res = iser_free_fmr_pool, + .reg_mem = iser_fast_reg_fmr, + .unreg_mem = iser_unreg_mem_fmr, + .reg_desc_get = iser_reg_desc_get_fmr, + .reg_desc_put = iser_reg_desc_put_fmr, +}; -static int -iser_alloc_bounce_sg(struct iser_data_buf *data) +int iser_assign_reg_ops(struct iser_device *device) { - struct scatterlist *sg; - struct page *page; - unsigned long length = data->data_len; - int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE); - - sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC); - if (!sg) - goto err; - - sg_init_table(sg, nents); - while (length) { - u32 page_len = min_t(u32, length, PAGE_SIZE); - - page = alloc_page(GFP_ATOMIC); - if (!page) - goto err; - - sg_set_page(&sg[i], page, page_len, 0); - length -= page_len; - i++; + struct ib_device_attr *dev_attr = &device->dev_attr; + + /* Assign function handles - based on FMR support */ + if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && + device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { + iser_info("FMR supported, using FMR for registration\n"); + device->reg_ops = &fmr_ops; + } else + if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + iser_info("FastReg supported, using FastReg for registration\n"); + device->reg_ops = &fastreg_ops; + } else { + iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); + return -1; } - data->orig_sg = data->sg; - data->orig_size = data->size; - data->sg = sg; - data->size = nents; - return 0; - -err: - for (; i > 0; i--) - __free_page(sg_page(&sg[i - 1])); - kfree(sg); - - return -ENOMEM; -} - -static void -iser_copy_bounce(struct iser_data_buf *data, bool to_buffer) -{ - struct scatterlist *osg, *bsg = data->sg; - void *oaddr, *baddr; - unsigned int left = data->data_len; - unsigned int bsg_off = 0; - int i; - - for_each_sg(data->orig_sg, osg, data->orig_size, i) { - unsigned int copy_len, osg_off = 0; - - oaddr = kmap_atomic(sg_page(osg)) + osg->offset; - copy_len = min(left, osg->length); - while (copy_len) { - unsigned int len = min(copy_len, bsg->length - bsg_off); - - baddr = kmap_atomic(sg_page(bsg)) + bsg->offset; - if (to_buffer) - memcpy(baddr + bsg_off, oaddr + osg_off, len); - else - memcpy(oaddr + osg_off, baddr + bsg_off, len); - - kunmap_atomic(baddr - bsg->offset); - osg_off += len; - bsg_off += len; - copy_len -= len; - - if (bsg_off >= bsg->length) { - bsg = sg_next(bsg); - bsg_off = 0; - } - } - kunmap_atomic(oaddr - osg->offset); - left -= osg_off; - } -} - -static inline void -iser_copy_from_bounce(struct iser_data_buf *data) -{ - iser_copy_bounce(data, false); -} - -static inline void -iser_copy_to_bounce(struct iser_data_buf *data) -{ - iser_copy_bounce(data, true); } -struct fast_reg_descriptor * -iser_reg_desc_get(struct ib_conn *ib_conn) +struct iser_fr_desc * +iser_reg_desc_get_fr(struct ib_conn *ib_conn) { - struct fast_reg_descriptor *desc; + struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; + struct iser_fr_desc *desc; unsigned long flags; - spin_lock_irqsave(&ib_conn->lock, flags); - desc = list_first_entry(&ib_conn->fastreg.pool, - struct fast_reg_descriptor, list); + spin_lock_irqsave(&fr_pool->lock, flags); + desc = list_first_entry(&fr_pool->list, + struct iser_fr_desc, list); list_del(&desc->list); - spin_unlock_irqrestore(&ib_conn->lock, flags); + spin_unlock_irqrestore(&fr_pool->lock, flags); return desc; } void -iser_reg_desc_put(struct ib_conn *ib_conn, - struct fast_reg_descriptor *desc) +iser_reg_desc_put_fr(struct ib_conn *ib_conn, + struct iser_fr_desc *desc) { + struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; unsigned long flags; - spin_lock_irqsave(&ib_conn->lock, flags); - list_add(&desc->list, &ib_conn->fastreg.pool); - spin_unlock_irqrestore(&ib_conn->lock, flags); + spin_lock_irqsave(&fr_pool->lock, flags); + list_add(&desc->list, &fr_pool->list); + spin_unlock_irqrestore(&fr_pool->lock, flags); } -/** - * iser_start_rdma_unaligned_sg - */ -static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum iser_data_dir cmd_dir) +struct iser_fr_desc * +iser_reg_desc_get_fmr(struct ib_conn *ib_conn) { - struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device; - int rc; - - rc = iser_alloc_bounce_sg(data); - if (rc) { - iser_err("Failed to allocate bounce for data len %lu\n", - data->data_len); - return rc; - } - - if (cmd_dir == ISER_DIR_OUT) - iser_copy_to_bounce(data); - - data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, - (cmd_dir == ISER_DIR_OUT) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (!data->dma_nents) { - iser_err("Got dma_nents %d, something went wrong...\n", - data->dma_nents); - rc = -ENOMEM; - goto err; - } + struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; - return 0; -err: - iser_free_bounce_sg(data); - return rc; + return list_first_entry(&fr_pool->list, + struct iser_fr_desc, list); } -/** - * iser_finalize_rdma_unaligned_sg - */ - -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum iser_data_dir cmd_dir) +void +iser_reg_desc_put_fmr(struct ib_conn *ib_conn, + struct iser_fr_desc *desc) { - struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device; - - ib_dma_unmap_sg(dev, data->sg, data->size, - (cmd_dir == ISER_DIR_OUT) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - - if (cmd_dir == ISER_DIR_IN) - iser_copy_from_bounce(data); - - iser_free_bounce_sg(data); } #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) @@ -289,52 +192,6 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, return cur_page; } - -/** - * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned - * for RDMA sub-list of a scatter-gather list of memory buffers, and returns - * the number of entries which are aligned correctly. Supports the case where - * consecutive SG elements are actually fragments of the same physcial page. - */ -static int iser_data_buf_aligned_len(struct iser_data_buf *data, - struct ib_device *ibdev) -{ - struct scatterlist *sg, *sgl, *next_sg = NULL; - u64 start_addr, end_addr; - int i, ret_len, start_check = 0; - - if (data->dma_nents == 1) - return 1; - - sgl = data->sg; - start_addr = ib_sg_dma_address(ibdev, sgl); - - for_each_sg(sgl, sg, data->dma_nents, i) { - if (start_check && !IS_4K_ALIGNED(start_addr)) - break; - - next_sg = sg_next(sg); - if (!next_sg) - break; - - end_addr = start_addr + ib_sg_dma_len(ibdev, sg); - start_addr = ib_sg_dma_address(ibdev, next_sg); - - if (end_addr == start_addr) { - start_check = 0; - continue; - } else - start_check = 1; - - if (!IS_4K_ALIGNED(end_addr)) - break; - } - ret_len = (next_sg) ? i : i+1; - iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n", - ret_len, data->dma_nents, data); - return ret_len; -} - static void iser_data_buf_dump(struct iser_data_buf *data, struct ib_device *ibdev) { @@ -393,7 +250,7 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, { struct scatterlist *sg = mem->sg; - reg->sge.lkey = device->mr->lkey; + reg->sge.lkey = device->pd->local_dma_lkey; reg->rkey = device->mr->rkey; reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); @@ -405,47 +262,21 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, return 0; } -static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, - struct iser_data_buf *mem, - enum iser_data_dir cmd_dir, - int aligned_len) -{ - struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; - struct iser_device *device = iser_task->iser_conn->ib_conn.device; - - iscsi_conn->fmr_unalign_cnt++; - iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", - aligned_len, mem->size); - - if (iser_debug_level > 0) - iser_data_buf_dump(mem, device->ib_device); - - /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_task, mem, - (cmd_dir == ISER_DIR_OUT) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - - /* allocate copy buf, if we are writing, copy the */ - /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0) - return -ENOMEM; - - return 0; -} - /** * iser_reg_page_vec - Register physical memory * * returns: 0 on success, errno code on failure */ static -int iser_reg_page_vec(struct iscsi_iser_task *iser_task, +int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, - struct iser_page_vec *page_vec, - struct iser_mem_reg *mem_reg) + struct iser_reg_resources *rsc, + struct iser_mem_reg *reg) { struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; + struct iser_page_vec *page_vec = rsc->page_vec; + struct ib_fmr_pool *fmr_pool = rsc->fmr_pool; struct ib_pool_fmr *fmr; int ret, plen; @@ -461,7 +292,7 @@ int iser_reg_page_vec(struct iscsi_iser_task *iser_task, return -EINVAL; } - fmr = ib_fmr_pool_map_phys(ib_conn->fmr.pool, + fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages, page_vec->length, page_vec->pages[0]); @@ -471,11 +302,15 @@ int iser_reg_page_vec(struct iscsi_iser_task *iser_task, return ret; } - mem_reg->sge.lkey = fmr->fmr->lkey; - mem_reg->rkey = fmr->fmr->rkey; - mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset; - mem_reg->sge.length = page_vec->data_size; - mem_reg->mem_h = fmr; + reg->sge.lkey = fmr->fmr->lkey; + reg->rkey = fmr->fmr->rkey; + reg->sge.addr = page_vec->pages[0] + page_vec->offset; + reg->sge.length = page_vec->data_size; + reg->mem_h = fmr; + + iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx," + " length=0x%x\n", reg->sge.lkey, reg->rkey, + reg->sge.addr, reg->sge.length); return 0; } @@ -505,71 +340,17 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { + struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; if (!reg->mem_h) return; - iser_reg_desc_put(&iser_task->iser_conn->ib_conn, - reg->mem_h); + device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn, + reg->mem_h); reg->mem_h = NULL; } -/** - * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA, - * using FMR (if possible) obtaining rkey and va - * - * returns 0 on success, errno code on failure - */ -int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) -{ - struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; - struct iser_device *device = ib_conn->device; - struct ib_device *ibdev = device->ib_device; - struct iser_data_buf *mem = &iser_task->data[cmd_dir]; - struct iser_mem_reg *mem_reg; - int aligned_len; - int err; - int i; - - mem_reg = &iser_task->rdma_reg[cmd_dir]; - - aligned_len = iser_data_buf_aligned_len(mem, ibdev); - if (aligned_len != mem->dma_nents) { - err = fall_to_bounce_buf(iser_task, mem, - cmd_dir, aligned_len); - if (err) { - iser_err("failed to allocate bounce buffer\n"); - return err; - } - } - - /* if there a single dma entry, FMR is not needed */ - if (mem->dma_nents == 1) { - return iser_reg_dma(device, mem, mem_reg); - } else { /* use FMR for multiple dma entries */ - err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec, - mem_reg); - if (err && err != -EAGAIN) { - iser_data_buf_dump(mem, ibdev); - iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", - mem->dma_nents, - ntoh24(iser_task->desc.iscsi_header.dlength)); - iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", - ib_conn->fmr.page_vec->data_size, - ib_conn->fmr.page_vec->length, - ib_conn->fmr.page_vec->offset); - for (i = 0; i < ib_conn->fmr.page_vec->length; i++) - iser_err("page_vec[%d] = 0x%llx\n", i, - (unsigned long long)ib_conn->fmr.page_vec->pages[i]); - } - if (err) - return err; - } - return 0; -} - static void iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs, struct ib_sig_domain *domain) @@ -637,10 +418,11 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) { u32 rkey; - memset(inv_wr, 0, sizeof(*inv_wr)); inv_wr->opcode = IB_WR_LOCAL_INV; inv_wr->wr_id = ISER_FASTREG_LI_WRID; inv_wr->ex.invalidate_rkey = mr->rkey; + inv_wr->send_flags = 0; + inv_wr->num_sge = 0; rkey = ib_inc_rkey(mr->rkey); ib_update_fast_reg_key(mr, rkey); @@ -648,61 +430,49 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, - struct fast_reg_descriptor *desc, + struct iser_pi_context *pi_ctx, struct iser_mem_reg *data_reg, struct iser_mem_reg *prot_reg, struct iser_mem_reg *sig_reg) { - struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; - struct iser_pi_context *pi_ctx = desc->pi_ctx; - struct ib_send_wr sig_wr, inv_wr; - struct ib_send_wr *bad_wr, *wr = NULL; - struct ib_sig_attrs sig_attrs; + struct iser_tx_desc *tx_desc = &iser_task->desc; + struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; + struct ib_sig_handover_wr *wr; int ret; - memset(&sig_attrs, 0, sizeof(sig_attrs)); - ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs); + memset(sig_attrs, 0, sizeof(*sig_attrs)); + ret = iser_set_sig_attrs(iser_task->sc, sig_attrs); if (ret) goto err; - iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask); + iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); - if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) { - iser_inv_rkey(&inv_wr, pi_ctx->sig_mr); - wr = &inv_wr; - } + if (!pi_ctx->sig_mr_valid) + iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr); - memset(&sig_wr, 0, sizeof(sig_wr)); - sig_wr.opcode = IB_WR_REG_SIG_MR; - sig_wr.wr_id = ISER_FASTREG_LI_WRID; - sig_wr.sg_list = &data_reg->sge; - sig_wr.num_sge = 1; - sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; - sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; + wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); + wr->wr.opcode = IB_WR_REG_SIG_MR; + wr->wr.wr_id = ISER_FASTREG_LI_WRID; + wr->wr.sg_list = &data_reg->sge; + wr->wr.num_sge = 1; + wr->wr.send_flags = 0; + wr->sig_attrs = sig_attrs; + wr->sig_mr = pi_ctx->sig_mr; if (scsi_prot_sg_count(iser_task->sc)) - sig_wr.wr.sig_handover.prot = &prot_reg->sge; - sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; - - if (!wr) - wr = &sig_wr; + wr->prot = &prot_reg->sge; else - wr->next = &sig_wr; - - ret = ib_post_send(ib_conn->qp, wr, &bad_wr); - if (ret) { - iser_err("reg_sig_mr failed, ret:%d\n", ret); - goto err; - } - desc->reg_indicators &= ~ISER_SIG_KEY_VALID; + wr->prot = NULL; + wr->access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + pi_ctx->sig_mr_valid = 0; sig_reg->sge.lkey = pi_ctx->sig_mr->lkey; sig_reg->rkey = pi_ctx->sig_mr->rkey; sig_reg->sge.addr = 0; sig_reg->sge.length = scsi_transfer_length(iser_task->sc); - iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n", + iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n", sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr, sig_reg->sge.length); err: @@ -711,149 +481,139 @@ err: static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, - struct fast_reg_descriptor *desc, - enum iser_reg_indicator ind, + struct iser_reg_resources *rsc, struct iser_mem_reg *reg) { - struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; - struct iser_device *device = ib_conn->device; - struct ib_mr *mr; - struct ib_fast_reg_page_list *frpl; - struct ib_send_wr fastreg_wr, inv_wr; - struct ib_send_wr *bad_wr, *wr = NULL; - int ret, offset, size, plen; - - /* if there a single dma entry, dma mr suffices */ - if (mem->dma_nents == 1) - return iser_reg_dma(device, mem, reg); - - if (ind == ISER_DATA_KEY_VALID) { - mr = desc->data_mr; - frpl = desc->data_frpl; - } else { - mr = desc->pi_ctx->prot_mr; - frpl = desc->pi_ctx->prot_frpl; + struct iser_tx_desc *tx_desc = &iser_task->desc; + struct ib_mr *mr = rsc->mr; + struct ib_reg_wr *wr; + int n; + + if (!rsc->mr_valid) + iser_inv_rkey(iser_tx_next_wr(tx_desc), mr); + + n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); + if (unlikely(n != mem->size)) { + iser_err("failed to map sg (%d/%d)\n", + n, mem->size); + return n < 0 ? n : -EINVAL; } - plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, - &offset, &size); - if (plen * SIZE_4K < size) { - iser_err("fast reg page_list too short to hold this SG\n"); - return -EINVAL; - } + wr = reg_wr(iser_tx_next_wr(tx_desc)); + wr->wr.opcode = IB_WR_REG_MR; + wr->wr.wr_id = ISER_FASTREG_LI_WRID; + wr->wr.send_flags = 0; + wr->wr.num_sge = 0; + wr->mr = mr; + wr->key = mr->rkey; + wr->access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; - if (!(desc->reg_indicators & ind)) { - iser_inv_rkey(&inv_wr, mr); - wr = &inv_wr; - } - - /* Prepare FASTREG WR */ - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr_id = ISER_FASTREG_LI_WRID; - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset; - fastreg_wr.wr.fast_reg.page_list = frpl; - fastreg_wr.wr.fast_reg.page_list_len = plen; - fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; - fastreg_wr.wr.fast_reg.length = size; - fastreg_wr.wr.fast_reg.rkey = mr->rkey; - fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - - if (!wr) - wr = &fastreg_wr; - else - wr->next = &fastreg_wr; - - ret = ib_post_send(ib_conn->qp, wr, &bad_wr); - if (ret) { - iser_err("fast registration failed, ret:%d\n", ret); - return ret; - } - desc->reg_indicators &= ~ind; + rsc->mr_valid = 0; reg->sge.lkey = mr->lkey; reg->rkey = mr->rkey; - reg->sge.addr = frpl->page_list[0] + offset; - reg->sge.length = size; + reg->sge.addr = mr->iova; + reg->sge.length = mr->length; - return ret; + iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n", + reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length); + + return 0; } -/** - * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA, - * using Fast Registration WR (if possible) obtaining rkey and va - * - * returns 0 on success, errno code on failure - */ -int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) +static int +iser_reg_prot_sg(struct iscsi_iser_task *task, + struct iser_data_buf *mem, + struct iser_fr_desc *desc, + bool use_dma_key, + struct iser_mem_reg *reg) { - struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; + struct iser_device *device = task->iser_conn->ib_conn.device; + + if (use_dma_key) + return iser_reg_dma(device, mem, reg); + + return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg); +} + +static int +iser_reg_data_sg(struct iscsi_iser_task *task, + struct iser_data_buf *mem, + struct iser_fr_desc *desc, + bool use_dma_key, + struct iser_mem_reg *reg) +{ + struct iser_device *device = task->iser_conn->ib_conn.device; + + if (use_dma_key) + return iser_reg_dma(device, mem, reg); + + return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg); +} + +int iser_reg_rdma_mem(struct iscsi_iser_task *task, + enum iser_data_dir dir) +{ + struct ib_conn *ib_conn = &task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; - struct ib_device *ibdev = device->ib_device; - struct iser_data_buf *mem = &iser_task->data[cmd_dir]; - struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir]; - struct fast_reg_descriptor *desc = NULL; - int err, aligned_len; - - aligned_len = iser_data_buf_aligned_len(mem, ibdev); - if (aligned_len != mem->dma_nents) { - err = fall_to_bounce_buf(iser_task, mem, - cmd_dir, aligned_len); - if (err) { - iser_err("failed to allocate bounce buffer\n"); - return err; - } - } + struct iser_data_buf *mem = &task->data[dir]; + struct iser_mem_reg *reg = &task->rdma_reg[dir]; + struct iser_mem_reg *data_reg; + struct iser_fr_desc *desc = NULL; + bool use_dma_key; + int err; - if (mem->dma_nents != 1 || - scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { - desc = iser_reg_desc_get(ib_conn); - mem_reg->mem_h = desc; + use_dma_key = (mem->dma_nents == 1 && !iser_always_reg && + scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL); + + if (!use_dma_key) { + desc = device->reg_ops->reg_desc_get(ib_conn); + reg->mem_h = desc; } - err = iser_fast_reg_mr(iser_task, mem, desc, - ISER_DATA_KEY_VALID, mem_reg); - if (err) + if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) + data_reg = reg; + else + data_reg = &task->desc.data_reg; + + err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg); + if (unlikely(err)) goto err_reg; - if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { - struct iser_mem_reg prot_reg; - - memset(&prot_reg, 0, sizeof(prot_reg)); - if (scsi_prot_sg_count(iser_task->sc)) { - mem = &iser_task->prot[cmd_dir]; - aligned_len = iser_data_buf_aligned_len(mem, ibdev); - if (aligned_len != mem->dma_nents) { - err = fall_to_bounce_buf(iser_task, mem, - cmd_dir, aligned_len); - if (err) { - iser_err("failed to allocate bounce buffer\n"); - return err; - } - } - - err = iser_fast_reg_mr(iser_task, mem, desc, - ISER_PROT_KEY_VALID, &prot_reg); - if (err) + if (scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) { + struct iser_mem_reg *prot_reg = &task->desc.prot_reg; + + if (scsi_prot_sg_count(task->sc)) { + mem = &task->prot[dir]; + err = iser_reg_prot_sg(task, mem, desc, + use_dma_key, prot_reg); + if (unlikely(err)) goto err_reg; } - err = iser_reg_sig_mr(iser_task, desc, mem_reg, - &prot_reg, mem_reg); - if (err) { - iser_err("Failed to register signature mr\n"); - return err; - } - desc->reg_indicators |= ISER_FASTREG_PROTECTED; + err = iser_reg_sig_mr(task, desc->pi_ctx, data_reg, + prot_reg, reg); + if (unlikely(err)) + goto err_reg; + + desc->pi_ctx->sig_protected = 1; } return 0; + err_reg: if (desc) - iser_reg_desc_put(ib_conn, desc); + device->reg_ops->reg_desc_put(ib_conn, desc); return err; } + +void iser_unreg_rdma_mem(struct iscsi_iser_task *task, + enum iser_data_dir dir) +{ + struct iser_device *device = task->iser_conn->ib_conn.device; + + device->reg_ops->unreg_mem(task, dir); +} diff --git a/kernel/drivers/infiniband/ulp/iser/iser_verbs.c b/kernel/drivers/infiniband/ulp/iser/iser_verbs.c index cc2dd35ff..42f4da620 100644 --- a/kernel/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/kernel/drivers/infiniband/ulp/iser/iser_verbs.c @@ -51,19 +51,22 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context); static void iser_cq_event_callback(struct ib_event *cause, void *context) { - iser_err("got cq event %d \n", cause->event); + iser_err("cq event %s (%d)\n", + ib_event_msg(cause->event), cause->event); } static void iser_qp_event_callback(struct ib_event *cause, void *context) { - iser_err("got qp event %d\n",cause->event); + iser_err("qp event %s (%d)\n", + ib_event_msg(cause->event), cause->event); } static void iser_event_handler(struct ib_event_handler *handler, struct ib_event *event) { - iser_err("async event %d on device %s port %d\n", event->event, - event->device->name, event->element.port_num); + iser_err("async event %s (%d) on device %s port %d\n", + ib_event_msg(event->event), event->event, + event->device->name, event->element.port_num); } /** @@ -84,25 +87,9 @@ static int iser_create_device_ib_res(struct iser_device *device) return ret; } - /* Assign function handles - based on FMR support */ - if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && - device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { - iser_info("FMR supported, using FMR for registration\n"); - device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; - device->iser_free_rdma_reg_res = iser_free_fmr_pool; - device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; - device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; - } else - if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { - iser_info("FastReg supported, using FastReg for registration\n"); - device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool; - device->iser_free_rdma_reg_res = iser_free_fastreg_pool; - device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg; - device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg; - } else { - iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); - return -1; - } + ret = iser_assign_reg_ops(device); + if (ret) + return ret; device->comps_used = min_t(int, num_online_cpus(), device->ib_device->num_comp_vectors); @@ -123,14 +110,17 @@ static int iser_create_device_ib_res(struct iser_device *device) goto pd_err; for (i = 0; i < device->comps_used; i++) { + struct ib_cq_init_attr cq_attr = {}; struct iser_comp *comp = &device->comps[i]; comp->device = device; + cq_attr.cqe = max_cqe; + cq_attr.comp_vector = i; comp->cq = ib_create_cq(device->ib_device, iser_cq_callback, iser_cq_event_callback, (void *)comp, - max_cqe, i); + &cq_attr); if (IS_ERR(comp->cq)) { comp->cq = NULL; goto cq_err; @@ -143,11 +133,15 @@ static int iser_create_device_ib_res(struct iser_device *device) (unsigned long)comp); } - device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - if (IS_ERR(device->mr)) - goto dma_mr_err; + if (!iser_always_reg) { + int access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + + device->mr = ib_get_dma_mr(device->pd, access); + if (IS_ERR(device->mr)) + goto dma_mr_err; + } INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, iser_event_handler); @@ -157,7 +151,8 @@ static int iser_create_device_ib_res(struct iser_device *device) return 0; handler_err: - ib_dereg_mr(device->mr); + if (device->mr) + ib_dereg_mr(device->mr); dma_mr_err: for (i = 0; i < device->comps_used; i++) tasklet_kill(&device->comps[i].tasklet); @@ -183,7 +178,6 @@ comps_err: static void iser_free_device_ib_res(struct iser_device *device) { int i; - BUG_ON(device->mr == NULL); for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; @@ -194,8 +188,9 @@ static void iser_free_device_ib_res(struct iser_device *device) } (void)ib_unregister_event_handler(&device->event_handler); - (void)ib_dereg_mr(device->mr); - (void)ib_dealloc_pd(device->pd); + if (device->mr) + (void)ib_dereg_mr(device->mr); + ib_dealloc_pd(device->pd); kfree(device->comps); device->comps = NULL; @@ -205,28 +200,40 @@ static void iser_free_device_ib_res(struct iser_device *device) } /** - * iser_create_fmr_pool - Creates FMR pool and page_vector + * iser_alloc_fmr_pool - Creates FMR pool and page_vector * * returns 0 on success, or errno code on failure */ -int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max) +int iser_alloc_fmr_pool(struct ib_conn *ib_conn, + unsigned cmds_max, + unsigned int size) { struct iser_device *device = ib_conn->device; + struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; + struct iser_page_vec *page_vec; + struct iser_fr_desc *desc; + struct ib_fmr_pool *fmr_pool; struct ib_fmr_pool_param params; - int ret = -ENOMEM; + int ret; - ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + - (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), - GFP_KERNEL); - if (!ib_conn->fmr.page_vec) - return ret; + INIT_LIST_HEAD(&fr_pool->list); + spin_lock_init(&fr_pool->lock); + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + page_vec = kmalloc(sizeof(*page_vec) + (sizeof(u64) * size), + GFP_KERNEL); + if (!page_vec) { + ret = -ENOMEM; + goto err_frpl; + } - ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1); + page_vec->pages = (u64 *)(page_vec + 1); params.page_shift = SHIFT_4K; - /* when the first/last SG element are not start/end * - * page aligned, the map whould be of N+1 pages */ - params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; + params.max_pages_per_fmr = size; /* make the pool size twice the max number of SCSI commands * * the ML is expected to queue, watermark for unmap at 50% */ params.pool_size = cmds_max * 2; @@ -237,23 +244,25 @@ int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max) IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); - if (!IS_ERR(ib_conn->fmr.pool)) - return 0; - - /* no FMR => no need for page_vec */ - kfree(ib_conn->fmr.page_vec); - ib_conn->fmr.page_vec = NULL; - - ret = PTR_ERR(ib_conn->fmr.pool); - ib_conn->fmr.pool = NULL; - if (ret != -ENOSYS) { + fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); + if (IS_ERR(fmr_pool)) { + ret = PTR_ERR(fmr_pool); iser_err("FMR allocation failed, err %d\n", ret); - return ret; - } else { - iser_warn("FMRs are not supported, using unaligned mode\n"); - return 0; + goto err_fmr; } + + desc->rsc.page_vec = page_vec; + desc->rsc.fmr_pool = fmr_pool; + list_add(&desc->list, &fr_pool->list); + + return 0; + +err_fmr: + kfree(page_vec); +err_frpl: + kfree(desc); + + return ret; } /** @@ -261,26 +270,54 @@ int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max) */ void iser_free_fmr_pool(struct ib_conn *ib_conn) { + struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; + struct iser_fr_desc *desc; + + desc = list_first_entry(&fr_pool->list, + struct iser_fr_desc, list); + list_del(&desc->list); + iser_info("freeing conn %p fmr pool %p\n", - ib_conn, ib_conn->fmr.pool); + ib_conn, desc->rsc.fmr_pool); + + ib_destroy_fmr_pool(desc->rsc.fmr_pool); + kfree(desc->rsc.page_vec); + kfree(desc); +} + +static int +iser_alloc_reg_res(struct ib_device *ib_device, + struct ib_pd *pd, + struct iser_reg_resources *res, + unsigned int size) +{ + int ret; - if (ib_conn->fmr.pool != NULL) - ib_destroy_fmr_pool(ib_conn->fmr.pool); + res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size); + if (IS_ERR(res->mr)) { + ret = PTR_ERR(res->mr); + iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); + return ret; + } + res->mr_valid = 1; - ib_conn->fmr.pool = NULL; + return 0; +} - kfree(ib_conn->fmr.page_vec); - ib_conn->fmr.page_vec = NULL; +static void +iser_free_reg_res(struct iser_reg_resources *rsc) +{ + ib_dereg_mr(rsc->mr); } static int -iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd, - struct fast_reg_descriptor *desc) +iser_alloc_pi_ctx(struct ib_device *ib_device, + struct ib_pd *pd, + struct iser_fr_desc *desc, + unsigned int size) { struct iser_pi_context *pi_ctx = NULL; - struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2, - .flags = IB_MR_SIGNATURE_EN}; - int ret = 0; + int ret; desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); if (!desc->pi_ctx) @@ -288,36 +325,25 @@ iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd, pi_ctx = desc->pi_ctx; - pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(pi_ctx->prot_frpl)) { - ret = PTR_ERR(pi_ctx->prot_frpl); - goto prot_frpl_failure; - } - - pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(pi_ctx->prot_mr)) { - ret = PTR_ERR(pi_ctx->prot_mr); - goto prot_mr_failure; + ret = iser_alloc_reg_res(ib_device, pd, &pi_ctx->rsc, size); + if (ret) { + iser_err("failed to allocate reg_resources\n"); + goto alloc_reg_res_err; } - desc->reg_indicators |= ISER_PROT_KEY_VALID; - pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); + pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2); if (IS_ERR(pi_ctx->sig_mr)) { ret = PTR_ERR(pi_ctx->sig_mr); goto sig_mr_failure; } - desc->reg_indicators |= ISER_SIG_KEY_VALID; - desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; + pi_ctx->sig_mr_valid = 1; + desc->pi_ctx->sig_protected = 0; return 0; sig_mr_failure: - ib_dereg_mr(desc->pi_ctx->prot_mr); -prot_mr_failure: - ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); -prot_frpl_failure: + iser_free_reg_res(&pi_ctx->rsc); +alloc_reg_res_err: kfree(desc->pi_ctx); return ret; @@ -326,82 +352,71 @@ prot_frpl_failure: static void iser_free_pi_ctx(struct iser_pi_context *pi_ctx) { - ib_free_fast_reg_page_list(pi_ctx->prot_frpl); - ib_dereg_mr(pi_ctx->prot_mr); - ib_destroy_mr(pi_ctx->sig_mr); + iser_free_reg_res(&pi_ctx->rsc); + ib_dereg_mr(pi_ctx->sig_mr); kfree(pi_ctx); } -static int -iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, - bool pi_enable, struct fast_reg_descriptor *desc) +static struct iser_fr_desc * +iser_create_fastreg_desc(struct ib_device *ib_device, + struct ib_pd *pd, + bool pi_enable, + unsigned int size) { + struct iser_fr_desc *desc; int ret; - desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc->data_frpl)) { - ret = PTR_ERR(desc->data_frpl); - iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", - ret); - return PTR_ERR(desc->data_frpl); - } + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + return ERR_PTR(-ENOMEM); - desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc->data_mr)) { - ret = PTR_ERR(desc->data_mr); - iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); - goto fast_reg_mr_failure; - } - desc->reg_indicators |= ISER_DATA_KEY_VALID; + ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc, size); + if (ret) + goto reg_res_alloc_failure; if (pi_enable) { - ret = iser_alloc_pi_ctx(ib_device, pd, desc); + ret = iser_alloc_pi_ctx(ib_device, pd, desc, size); if (ret) goto pi_ctx_alloc_failure; } - return 0; + return desc; + pi_ctx_alloc_failure: - ib_dereg_mr(desc->data_mr); -fast_reg_mr_failure: - ib_free_fast_reg_page_list(desc->data_frpl); + iser_free_reg_res(&desc->rsc); +reg_res_alloc_failure: + kfree(desc); - return ret; + return ERR_PTR(ret); } /** - * iser_create_fastreg_pool - Creates pool of fast_reg descriptors + * iser_alloc_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. * returns 0 on success, or errno code on failure */ -int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max) +int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, + unsigned cmds_max, + unsigned int size) { struct iser_device *device = ib_conn->device; - struct fast_reg_descriptor *desc; + struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; + struct iser_fr_desc *desc; int i, ret; - INIT_LIST_HEAD(&ib_conn->fastreg.pool); - ib_conn->fastreg.pool_size = 0; + INIT_LIST_HEAD(&fr_pool->list); + spin_lock_init(&fr_pool->lock); + fr_pool->size = 0; for (i = 0; i < cmds_max; i++) { - desc = kzalloc(sizeof(*desc), GFP_KERNEL); - if (!desc) { - iser_err("Failed to allocate a new fast_reg descriptor\n"); - ret = -ENOMEM; - goto err; - } - - ret = iser_create_fastreg_desc(device->ib_device, device->pd, - ib_conn->pi_support, desc); - if (ret) { - iser_err("Failed to create fastreg descriptor err=%d\n", - ret); - kfree(desc); + desc = iser_create_fastreg_desc(device->ib_device, device->pd, + ib_conn->pi_support, size); + if (IS_ERR(desc)) { + ret = PTR_ERR(desc); goto err; } - list_add_tail(&desc->list, &ib_conn->fastreg.pool); - ib_conn->fastreg.pool_size++; + list_add_tail(&desc->list, &fr_pool->list); + fr_pool->size++; } return 0; @@ -416,27 +431,27 @@ err: */ void iser_free_fastreg_pool(struct ib_conn *ib_conn) { - struct fast_reg_descriptor *desc, *tmp; + struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; + struct iser_fr_desc *desc, *tmp; int i = 0; - if (list_empty(&ib_conn->fastreg.pool)) + if (list_empty(&fr_pool->list)) return; iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { + list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) { list_del(&desc->list); - ib_free_fast_reg_page_list(desc->data_frpl); - ib_dereg_mr(desc->data_mr); + iser_free_reg_res(&desc->rsc); if (desc->pi_ctx) iser_free_pi_ctx(desc->pi_ctx); kfree(desc); ++i; } - if (i < ib_conn->fastreg.pool_size) + if (i < fr_pool->size) iser_warn("pool still has %d regions registered\n", - ib_conn->fastreg.pool_size - i); + fr_pool->size - i); } /** @@ -732,6 +747,31 @@ static void iser_connect_error(struct rdma_cm_id *cma_id) iser_conn->state = ISER_CONN_TERMINATING; } +static void +iser_calc_scsi_params(struct iser_conn *iser_conn, + unsigned int max_sectors) +{ + struct iser_device *device = iser_conn->ib_conn.device; + unsigned short sg_tablesize, sup_sg_tablesize; + + sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); + sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE, + device->dev_attr.max_fast_reg_page_list_len); + + if (sg_tablesize > sup_sg_tablesize) { + sg_tablesize = sup_sg_tablesize; + iser_conn->scsi_max_sectors = sg_tablesize * SIZE_4K / 512; + } else { + iser_conn->scsi_max_sectors = max_sectors; + } + + iser_conn->scsi_sg_tablesize = sg_tablesize; + + iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n", + iser_conn, iser_conn->scsi_sg_tablesize, + iser_conn->scsi_max_sectors); +} + /** * Called with state mutex held **/ @@ -770,6 +810,8 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) } } + iser_calc_scsi_params(iser_conn, iser_max_sectors); + ret = rdma_resolve_route(cma_id, 1000); if (ret) { iser_err("resolve route failed: %d\n", ret); @@ -873,8 +915,9 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve int ret = 0; iser_conn = (struct iser_conn *)cma_id->context; - iser_info("event %d status %d conn %p id %p\n", - event->event, event->status, cma_id->context, cma_id); + iser_info("%s (%d): status %d conn %p id %p\n", + rdma_event_msg(event->event), event->event, + event->status, cma_id->context, cma_id); mutex_lock(&iser_conn->state_mutex); switch (event->event) { @@ -913,7 +956,8 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve } break; default: - iser_err("Unexpected RDMA CM event (%d)\n", event->event); + iser_err("Unexpected RDMA CM event: %s (%d)\n", + rdma_event_msg(event->event), event->event); break; } mutex_unlock(&iser_conn->state_mutex); @@ -930,7 +974,6 @@ void iser_conn_init(struct iser_conn *iser_conn) init_completion(&iser_conn->ib_completion); init_completion(&iser_conn->up_completion); INIT_LIST_HEAD(&iser_conn->conn_list); - spin_lock_init(&iser_conn->ib_conn.lock); mutex_init(&iser_conn->state_mutex); } @@ -960,7 +1003,7 @@ int iser_connect(struct iser_conn *iser_conn, ib_conn->beacon.wr_id = ISER_BEACON_WRID; ib_conn->beacon.opcode = IB_WR_SEND; - ib_conn->cma_id = rdma_create_id(iser_cma_handler, + ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ib_conn->cma_id)) { @@ -1009,7 +1052,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) sge.addr = iser_conn->login_resp_dma; sge.length = ISER_RX_LOGIN_SIZE; - sge.lkey = ib_conn->device->mr->lkey; + sge.lkey = ib_conn->device->pd->local_dma_lkey; rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; rx_wr.sg_list = &sge; @@ -1064,23 +1107,24 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) { - int ib_ret; - struct ib_send_wr send_wr, *send_wr_failed; + struct ib_send_wr *bad_wr, *wr = iser_tx_next_wr(tx_desc); + int ib_ret; ib_dma_sync_single_for_device(ib_conn->device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); - send_wr.next = NULL; - send_wr.wr_id = (uintptr_t)tx_desc; - send_wr.sg_list = tx_desc->tx_sg; - send_wr.num_sge = tx_desc->num_sge; - send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0; + wr->next = NULL; + wr->wr_id = (uintptr_t)tx_desc; + wr->sg_list = tx_desc->tx_sg; + wr->num_sge = tx_desc->num_sge; + wr->opcode = IB_WR_SEND; + wr->send_flags = signal ? IB_SEND_SIGNALED : 0; - ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); + ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr); if (ib_ret) - iser_err("ib_post_send failed, ret:%d\n", ib_ret); + iser_err("ib_post_send failed, ret:%d opcode:%d\n", + ib_ret, bad_wr->opcode); return ib_ret; } @@ -1173,10 +1217,13 @@ static void iser_handle_wc(struct ib_wc *wc) } } else { if (wc->status != IB_WC_WR_FLUSH_ERR) - iser_err("wr id %llx status %d vend_err %x\n", - wc->wr_id, wc->status, wc->vendor_err); + iser_err("%s (%d): wr id %llx vend_err %x\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id, wc->vendor_err); else - iser_dbg("flush error: wr id %llx\n", wc->wr_id); + iser_dbg("%s (%d): wr id %llx\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id); if (wc->wr_id == ISER_BEACON_WRID) /* all flush errors were consumed */ @@ -1229,13 +1276,13 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector) { struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; - struct fast_reg_descriptor *desc = reg->mem_h; + struct iser_fr_desc *desc = reg->mem_h; unsigned long sector_size = iser_task->sc->device->sector_size; struct ib_mr_status mr_status; int ret; - if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) { - desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; + if (desc && desc->pi_ctx->sig_protected) { + desc->pi_ctx->sig_protected = 0; ret = ib_check_mr_status(desc->pi_ctx->sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { @@ -1246,7 +1293,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { sector_t sector_off = mr_status.sig_err.sig_err_offset; - do_div(sector_off, sector_size + 8); + sector_div(sector_off, sector_size + 8); *sector = scsi_get_lba(iser_task->sc) + sector_off; pr_err("PI error found type %d at sector %llx " diff --git a/kernel/drivers/infiniband/ulp/isert/ib_isert.c b/kernel/drivers/infiniband/ulp/isert/ib_isert.c index 575a072d7..8a51c3b5d 100644 --- a/kernel/drivers/infiniband/ulp/isert/ib_isert.c +++ b/kernel/drivers/infiniband/ulp/isert/ib_isert.c @@ -80,7 +80,9 @@ isert_qp_event_callback(struct ib_event *e, void *context) { struct isert_conn *isert_conn = context; - isert_err("conn %p event: %d\n", isert_conn, e->event); + isert_err("%s (%d): conn %p\n", + ib_event_msg(e->event), e->event, isert_conn); + switch (e->event) { case IB_EVENT_COMM_EST: rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); @@ -155,16 +157,9 @@ isert_create_qp(struct isert_conn *isert_conn, attr.recv_cq = comp->cq; attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; - /* - * FIXME: Use devattr.max_sge - 2 for max_send_sge as - * work-around for RDMA_READs with ConnectX-2. - * - * Also, still make sure to have at least two SGEs for - * outgoing control PDU responses. - */ - attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); - isert_conn->max_sge = attr.cap.max_send_sge; - + attr.cap.max_send_sge = device->dev_attr.max_sge; + isert_conn->max_sge = min(device->dev_attr.max_sge, + device->dev_attr.max_sge_rd); attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; @@ -233,11 +228,9 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn) rx_sg = &rx_desc->rx_sg; rx_sg->addr = rx_desc->dma_addr; rx_sg->length = ISER_RX_PAYLOAD_SIZE; - rx_sg->lkey = device->mr->lkey; + rx_sg->lkey = device->pd->local_dma_lkey; } - isert_conn->rx_desc_head = 0; - return 0; dma_map_fail: @@ -318,15 +311,18 @@ isert_alloc_comps(struct isert_device *device, max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe); for (i = 0; i < device->comps_used; i++) { + struct ib_cq_init_attr cq_attr = {}; struct isert_comp *comp = &device->comps[i]; comp->device = device; INIT_WORK(&comp->work, isert_cq_work); + cq_attr.cqe = max_cqe; + cq_attr.comp_vector = i; comp->cq = ib_create_cq(device->ib_device, isert_cq_callback, isert_cq_event_callback, (void *)comp, - max_cqe, i); + &cq_attr); if (IS_ERR(comp->cq)) { isert_err("Unable to allocate cq\n"); ret = PTR_ERR(comp->cq); @@ -380,22 +376,12 @@ isert_create_device_ib_res(struct isert_device *device) goto out_cq; } - device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(device->mr)) { - ret = PTR_ERR(device->mr); - isert_err("failed to create dma mr, device %p, ret=%d\n", - device, ret); - goto out_mr; - } - /* Check signature cap */ device->pi_capable = dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER ? true : false; return 0; -out_mr: - ib_dealloc_pd(device->pd); out_cq: isert_free_comps(device); return ret; @@ -406,7 +392,6 @@ isert_free_device_ib_res(struct isert_device *device) { isert_info("device %p\n", device); - ib_dereg_mr(device->mr); ib_dealloc_pd(device->pd); isert_free_comps(device); } @@ -481,12 +466,10 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) list_for_each_entry_safe(fr_desc, tmp, &isert_conn->fr_pool, list) { list_del(&fr_desc->list); - ib_free_fast_reg_page_list(fr_desc->data_frpl); ib_dereg_mr(fr_desc->data_mr); if (fr_desc->pi_ctx) { - ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl); ib_dereg_mr(fr_desc->pi_ctx->prot_mr); - ib_destroy_mr(fr_desc->pi_ctx->sig_mr); + ib_dereg_mr(fr_desc->pi_ctx->sig_mr); kfree(fr_desc->pi_ctx); } kfree(fr_desc); @@ -503,7 +486,6 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc, struct ib_device *device, struct ib_pd *pd) { - struct ib_mr_init_attr mr_init_attr; struct pi_context *pi_ctx; int ret; @@ -513,28 +495,17 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc, return -ENOMEM; } - pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(pi_ctx->prot_frpl)) { - isert_err("Failed to allocate prot frpl err=%ld\n", - PTR_ERR(pi_ctx->prot_frpl)); - ret = PTR_ERR(pi_ctx->prot_frpl); - goto err_pi_ctx; - } - - pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); + pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, + ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(pi_ctx->prot_mr)) { isert_err("Failed to allocate prot frmr err=%ld\n", PTR_ERR(pi_ctx->prot_mr)); ret = PTR_ERR(pi_ctx->prot_mr); - goto err_prot_frpl; + goto err_pi_ctx; } desc->ind |= ISERT_PROT_KEY_VALID; - memset(&mr_init_attr, 0, sizeof(mr_init_attr)); - mr_init_attr.max_reg_descriptors = 2; - mr_init_attr.flags |= IB_MR_SIGNATURE_EN; - pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); + pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2); if (IS_ERR(pi_ctx->sig_mr)) { isert_err("Failed to allocate signature enabled mr err=%ld\n", PTR_ERR(pi_ctx->sig_mr)); @@ -550,8 +521,6 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc, err_prot_mr: ib_dereg_mr(pi_ctx->prot_mr); -err_prot_frpl: - ib_free_fast_reg_page_list(pi_ctx->prot_frpl); err_pi_ctx: kfree(pi_ctx); @@ -562,33 +531,18 @@ static int isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, struct fast_reg_descriptor *fr_desc) { - int ret; - - fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(fr_desc->data_frpl)) { - isert_err("Failed to allocate data frpl err=%ld\n", - PTR_ERR(fr_desc->data_frpl)); - return PTR_ERR(fr_desc->data_frpl); - } - - fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); + fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, + ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(fr_desc->data_mr)) { isert_err("Failed to allocate data frmr err=%ld\n", PTR_ERR(fr_desc->data_mr)); - ret = PTR_ERR(fr_desc->data_mr); - goto err_data_frpl; + return PTR_ERR(fr_desc->data_mr); } fr_desc->ind |= ISERT_DATA_KEY_VALID; isert_dbg("Created fr_desc %p\n", fr_desc); return 0; - -err_data_frpl: - ib_free_fast_reg_page_list(fr_desc->data_frpl); - - return ret; } static int @@ -642,7 +596,7 @@ static void isert_init_conn(struct isert_conn *isert_conn) { isert_conn->state = ISER_CONN_INIT; - INIT_LIST_HEAD(&isert_conn->accept_node); + INIT_LIST_HEAD(&isert_conn->node); init_completion(&isert_conn->login_comp); init_completion(&isert_conn->login_req_comp); init_completion(&isert_conn->wait); @@ -775,12 +729,10 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) if (ret) goto out_conn_dev; - mutex_lock(&isert_np->np_accept_mutex); - list_add_tail(&isert_conn->accept_node, &isert_np->np_accept_list); - mutex_unlock(&isert_np->np_accept_mutex); + mutex_lock(&isert_np->mutex); + list_add_tail(&isert_conn->node, &isert_np->accepted); + mutex_unlock(&isert_np->mutex); - isert_info("np %p: Allow accept_np to continue\n", np); - up(&isert_np->np_sem); return 0; out_conn_dev: @@ -828,18 +780,21 @@ static void isert_connected_handler(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->qp->qp_context; + struct isert_np *isert_np = cma_id->context; isert_info("conn %p\n", isert_conn); - if (!kref_get_unless_zero(&isert_conn->kref)) { - isert_warn("conn %p connect_release is running\n", isert_conn); - return; - } - mutex_lock(&isert_conn->mutex); - if (isert_conn->state != ISER_CONN_FULL_FEATURE) - isert_conn->state = ISER_CONN_UP; + isert_conn->state = ISER_CONN_UP; + kref_get(&isert_conn->kref); mutex_unlock(&isert_conn->mutex); + + mutex_lock(&isert_np->mutex); + list_move_tail(&isert_conn->node, &isert_np->pending); + mutex_unlock(&isert_np->mutex); + + isert_info("np %p: Allow accept_np to continue\n", isert_np); + up(&isert_np->sem); } static void @@ -900,18 +855,19 @@ static int isert_np_cma_handler(struct isert_np *isert_np, enum rdma_cm_event_type event) { - isert_dbg("isert np %p, handling event %d\n", isert_np, event); + isert_dbg("%s (%d): isert np %p\n", + rdma_event_msg(event), event, isert_np); switch (event) { case RDMA_CM_EVENT_DEVICE_REMOVAL: - isert_np->np_cm_id = NULL; + isert_np->cm_id = NULL; break; case RDMA_CM_EVENT_ADDR_CHANGE: - isert_np->np_cm_id = isert_setup_id(isert_np); - if (IS_ERR(isert_np->np_cm_id)) { + isert_np->cm_id = isert_setup_id(isert_np); + if (IS_ERR(isert_np->cm_id)) { isert_err("isert np %p setup id failed: %ld\n", - isert_np, PTR_ERR(isert_np->np_cm_id)); - isert_np->np_cm_id = NULL; + isert_np, PTR_ERR(isert_np->cm_id)); + isert_np->cm_id = NULL; } break; default: @@ -930,7 +886,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id, struct isert_conn *isert_conn; bool terminating = false; - if (isert_np->np_cm_id == cma_id) + if (isert_np->cm_id == cma_id) return isert_np_cma_handler(cma_id->context, event); isert_conn = cma_id->qp->qp_context; @@ -946,13 +902,13 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id, if (terminating) goto out; - mutex_lock(&isert_np->np_accept_mutex); - if (!list_empty(&isert_conn->accept_node)) { - list_del_init(&isert_conn->accept_node); + mutex_lock(&isert_np->mutex); + if (!list_empty(&isert_conn->node)) { + list_del_init(&isert_conn->node); isert_put_conn(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); } - mutex_unlock(&isert_np->np_accept_mutex); + mutex_unlock(&isert_np->mutex); out: return 0; @@ -963,6 +919,7 @@ isert_connect_error(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->qp->qp_context; + list_del_init(&isert_conn->node); isert_conn->cm_id = NULL; isert_put_conn(isert_conn); @@ -974,7 +931,8 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret = 0; - isert_info("event %d status %d id %p np %p\n", event->event, + isert_info("%s (%d): status %d id %p np %p\n", + rdma_event_msg(event->event), event->event, event->status, cma_id, cma_id->context); switch (event->event) { @@ -1006,35 +964,51 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) } static int -isert_post_recv(struct isert_conn *isert_conn, u32 count) +isert_post_recvm(struct isert_conn *isert_conn, u32 count) { struct ib_recv_wr *rx_wr, *rx_wr_failed; int i, ret; - unsigned int rx_head = isert_conn->rx_desc_head; struct iser_rx_desc *rx_desc; for (rx_wr = isert_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { - rx_desc = &isert_conn->rx_descs[rx_head]; - rx_wr->wr_id = (uintptr_t)rx_desc; - rx_wr->sg_list = &rx_desc->rx_sg; - rx_wr->num_sge = 1; - rx_wr->next = rx_wr + 1; - rx_head = (rx_head + 1) & (ISERT_QP_MAX_RECV_DTOS - 1); + rx_desc = &isert_conn->rx_descs[i]; + rx_wr->wr_id = (uintptr_t)rx_desc; + rx_wr->sg_list = &rx_desc->rx_sg; + rx_wr->num_sge = 1; + rx_wr->next = rx_wr + 1; } - rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ isert_conn->post_recv_buf_count += count; ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, - &rx_wr_failed); + &rx_wr_failed); if (ret) { isert_err("ib_post_recv() failed with ret: %d\n", ret); isert_conn->post_recv_buf_count -= count; - } else { - isert_dbg("Posted %d RX buffers\n", count); - isert_conn->rx_desc_head = rx_head; } + + return ret; +} + +static int +isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) +{ + struct ib_recv_wr *rx_wr_failed, rx_wr; + int ret; + + rx_wr.wr_id = (uintptr_t)rx_desc; + rx_wr.sg_list = &rx_desc->rx_sg; + rx_wr.num_sge = 1; + rx_wr.next = NULL; + + isert_conn->post_recv_buf_count++; + ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed); + if (ret) { + isert_err("ib_post_recv() failed with ret: %d\n", ret); + isert_conn->post_recv_buf_count--; + } + return ret; } @@ -1079,8 +1053,8 @@ isert_create_send_desc(struct isert_conn *isert_conn, tx_desc->num_sge = 1; tx_desc->isert_cmd = isert_cmd; - if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { - tx_desc->tx_sg[0].lkey = device->mr->lkey; + if (tx_desc->tx_sg[0].lkey != device->pd->local_dma_lkey) { + tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey; isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc); } } @@ -1103,7 +1077,7 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn, tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; - tx_desc->tx_sg[0].lkey = device->mr->lkey; + tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey; isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n", tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length, @@ -1136,7 +1110,7 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn) memset(&sge, 0, sizeof(struct ib_sge)); sge.addr = isert_conn->login_req_dma; sge.length = ISER_RX_LOGIN_SIZE; - sge.lkey = isert_conn->device->mr->lkey; + sge.lkey = isert_conn->device->pd->local_dma_lkey; isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n", sge.addr, sge.length, sge.lkey); @@ -1186,7 +1160,7 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, tx_dsg->addr = isert_conn->login_rsp_dma; tx_dsg->length = length; - tx_dsg->lkey = isert_conn->device->mr->lkey; + tx_dsg->lkey = isert_conn->device->pd->local_dma_lkey; tx_desc->num_sge = 2; } if (!login->login_failed) { @@ -1205,7 +1179,8 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, if (ret) return ret; - ret = isert_post_recv(isert_conn, ISERT_MIN_POSTED_RX); + ret = isert_post_recvm(isert_conn, + ISERT_QP_MAX_RECV_DTOS); if (ret) return ret; @@ -1278,7 +1253,7 @@ isert_rx_login_req(struct isert_conn *isert_conn) } static struct iscsi_cmd -*isert_allocate_cmd(struct iscsi_conn *conn) +*isert_allocate_cmd(struct iscsi_conn *conn, struct iser_rx_desc *rx_desc) { struct isert_conn *isert_conn = conn->context; struct isert_cmd *isert_cmd; @@ -1292,6 +1267,7 @@ static struct iscsi_cmd isert_cmd = iscsit_priv_cmd(cmd); isert_cmd->conn = isert_conn; isert_cmd->iscsi_cmd = cmd; + isert_cmd->rx_desc = rx_desc; return cmd; } @@ -1303,9 +1279,9 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, { struct iscsi_conn *conn = isert_conn->conn; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf; - struct scatterlist *sg; int imm_data, imm_data_len, unsol_data, sg_nents, rc; bool dump_payload = false; + unsigned int data_len; rc = iscsit_setup_scsi_cmd(conn, cmd, buf); if (rc < 0) @@ -1314,7 +1290,10 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, imm_data = cmd->immediate_data; imm_data_len = cmd->first_burst_len; unsol_data = cmd->unsolicited_data; + data_len = cmd->se_cmd.data_length; + if (imm_data && imm_data_len == data_len) + cmd->se_cmd.se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; rc = iscsit_process_scsi_cmd(conn, cmd, hdr); if (rc < 0) { return 0; @@ -1326,13 +1305,20 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, if (!imm_data) return 0; - sg = &cmd->se_cmd.t_data_sg[0]; - sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE)); - - isert_dbg("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n", - sg, sg_nents, &rx_desc->data[0], imm_data_len); - - sg_copy_from_buffer(sg, sg_nents, &rx_desc->data[0], imm_data_len); + if (imm_data_len != data_len) { + sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE)); + sg_copy_from_buffer(cmd->se_cmd.t_data_sg, sg_nents, + &rx_desc->data[0], imm_data_len); + isert_dbg("Copy Immediate sg_nents: %u imm_data_len: %d\n", + sg_nents, imm_data_len); + } else { + sg_init_table(&isert_cmd->sg, 1); + cmd->se_cmd.t_data_sg = &isert_cmd->sg; + cmd->se_cmd.t_data_nents = 1; + sg_set_buf(&isert_cmd->sg, &rx_desc->data[0], imm_data_len); + isert_dbg("Transfer Immediate imm_data_len: %d\n", + imm_data_len); + } cmd->write_data_done += imm_data_len; @@ -1349,7 +1335,7 @@ sequence_cmd: if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); else if (dump_payload && imm_data) - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1407,6 +1393,15 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, if (rc < 0) return rc; + /* + * multiple data-outs on the same command can arrive - + * so post the buffer before hand + */ + rc = isert_post_recv(isert_conn, rx_desc); + if (rc) { + isert_err("ib_post_recv failed with %d\n", rc); + return rc; + } return 0; } @@ -1479,7 +1474,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, switch (opcode) { case ISCSI_OP_SCSI_CMD: - cmd = isert_allocate_cmd(conn); + cmd = isert_allocate_cmd(conn, rx_desc); if (!cmd) break; @@ -1493,7 +1488,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_NOOP_OUT: - cmd = isert_allocate_cmd(conn); + cmd = isert_allocate_cmd(conn, rx_desc); if (!cmd) break; @@ -1506,7 +1501,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_TMFUNC: - cmd = isert_allocate_cmd(conn); + cmd = isert_allocate_cmd(conn, rx_desc); if (!cmd) break; @@ -1514,22 +1509,20 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_LOGOUT: - cmd = isert_allocate_cmd(conn); + cmd = isert_allocate_cmd(conn, rx_desc); if (!cmd) break; ret = iscsit_handle_logout_cmd(conn, cmd, (unsigned char *)hdr); break; case ISCSI_OP_TEXT: - if (be32_to_cpu(hdr->ttt) != 0xFFFFFFFF) { + if (be32_to_cpu(hdr->ttt) != 0xFFFFFFFF) cmd = iscsit_find_cmd_from_itt(conn, hdr->itt); - if (!cmd) - break; - } else { - cmd = isert_allocate_cmd(conn); - if (!cmd) - break; - } + else + cmd = isert_allocate_cmd(conn, rx_desc); + + if (!cmd) + break; isert_cmd = iscsit_priv_cmd(cmd); ret = isert_handle_text_cmd(isert_conn, isert_cmd, cmd, @@ -1550,7 +1543,6 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn) struct iser_hdr *iser_hdr = &rx_desc->iser_header; uint64_t read_va = 0, write_va = 0; uint32_t read_stag = 0, write_stag = 0; - int rc; switch (iser_hdr->flags & 0xF0) { case ISCSI_CTRL: @@ -1577,8 +1569,8 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn) break; } - rc = isert_rx_opcode(isert_conn, rx_desc, - read_stag, read_va, write_stag, write_va); + isert_rx_opcode(isert_conn, rx_desc, + read_stag, read_va, write_stag, write_va); } static void @@ -1589,7 +1581,7 @@ isert_rcv_completion(struct iser_rx_desc *desc, struct ib_device *ib_dev = isert_conn->cm_id->device; struct iscsi_hdr *hdr; u64 rx_dma; - int rx_buflen, outstanding; + int rx_buflen; if ((char *)desc == isert_conn->login_req_buf) { rx_dma = isert_conn->login_req_dma; @@ -1629,22 +1621,6 @@ isert_rcv_completion(struct iser_rx_desc *desc, DMA_FROM_DEVICE); isert_conn->post_recv_buf_count--; - isert_dbg("Decremented post_recv_buf_count: %d\n", - isert_conn->post_recv_buf_count); - - if ((char *)desc == isert_conn->login_req_buf) - return; - - outstanding = isert_conn->post_recv_buf_count; - if (outstanding + ISERT_MIN_POSTED_RX <= ISERT_QP_MAX_RECV_DTOS) { - int err, count = min(ISERT_QP_MAX_RECV_DTOS - outstanding, - ISERT_MIN_POSTED_RX); - err = isert_post_recv(isert_conn, count); - if (err) { - isert_err("isert_post_recv() count: %d failed, %d\n", - count, err); - } - } } static int @@ -1703,10 +1679,10 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) isert_unmap_data_buf(isert_conn, &wr->data); } - if (wr->send_wr) { + if (wr->rdma_wr) { isert_dbg("Cmd %p free send_wr\n", isert_cmd); - kfree(wr->send_wr); - wr->send_wr = NULL; + kfree(wr->rdma_wr); + wr->rdma_wr = NULL; } if (wr->ib_sge) { @@ -1741,7 +1717,7 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) } wr->ib_sge = NULL; - wr->send_wr = NULL; + wr->rdma_wr = NULL; } static void @@ -1774,7 +1750,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) { struct se_cmd *se_cmd = &cmd->se_cmd; - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } } @@ -1910,7 +1886,7 @@ isert_completion_rdma_write(struct iser_tx_desc *tx_desc, } device->unreg_rdma_mem(isert_cmd, isert_conn); - wr->send_wr_num = 0; + wr->rdma_wr_num = 0; if (ret) transport_send_check_condition_and_sense(se_cmd, se_cmd->pi_err, 0); @@ -1938,7 +1914,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, iscsit_stop_dataout_timer(cmd); device->unreg_rdma_mem(isert_cmd, isert_conn); cmd->write_data_done = wr->data.len; - wr->send_wr_num = 0; + wr->rdma_wr_num = 0; isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); @@ -1947,7 +1923,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, spin_unlock_bh(&cmd->istate_lock); if (ret) { - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); transport_send_check_condition_and_sense(se_cmd, se_cmd->pi_err, 0); } else { @@ -2108,10 +2084,13 @@ isert_handle_wc(struct ib_wc *wc) } } else { if (wc->status != IB_WC_WR_FLUSH_ERR) - isert_err("wr id %llx status %d vend_err %x\n", - wc->wr_id, wc->status, wc->vendor_err); + isert_err("%s (%d): wr id %llx vend_err %x\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id, wc->vendor_err); else - isert_dbg("flush error: wr id %llx\n", wc->wr_id); + isert_dbg("%s (%d): wr id %llx\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id); if (wc->wr_id != ISER_FASTREG_LI_WRID) isert_cq_comp_err(isert_conn, wc); @@ -2153,6 +2132,12 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) struct ib_send_wr *wr_failed; int ret; + ret = isert_post_recv(isert_conn, isert_cmd->rx_desc); + if (ret) { + isert_err("ib_post_recv failed with %d\n", ret); + return ret; + } + ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, &wr_failed); if (ret) { @@ -2200,7 +2185,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_cmd->pdu_buf_len = pdu_len; tx_dsg->addr = isert_cmd->pdu_buf_dma; tx_dsg->length = pdu_len; - tx_dsg->lkey = device->mr->lkey; + tx_dsg->lkey = device->pd->local_dma_lkey; isert_cmd->tx_desc.num_sge = 2; } @@ -2328,7 +2313,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) isert_cmd->pdu_buf_len = ISCSI_HDR_LEN; tx_dsg->addr = isert_cmd->pdu_buf_dma; tx_dsg->length = ISCSI_HDR_LEN; - tx_dsg->lkey = device->mr->lkey; + tx_dsg->lkey = device->pd->local_dma_lkey; isert_cmd->tx_desc.num_sge = 2; isert_init_send_wr(isert_conn, isert_cmd, send_wr); @@ -2369,7 +2354,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) isert_cmd->pdu_buf_len = txt_rsp_len; tx_dsg->addr = isert_cmd->pdu_buf_dma; tx_dsg->length = txt_rsp_len; - tx_dsg->lkey = device->mr->lkey; + tx_dsg->lkey = device->pd->local_dma_lkey; isert_cmd->tx_desc.num_sge = 2; } isert_init_send_wr(isert_conn, isert_cmd, send_wr); @@ -2381,7 +2366,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct ib_sge *ib_sge, struct ib_send_wr *send_wr, + struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr, u32 data_left, u32 offset) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; @@ -2396,8 +2381,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge); page_off = offset % PAGE_SIZE; - send_wr->sg_list = ib_sge; - send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; + rdma_wr->wr.sg_list = ib_sge; + rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc; /* * Perform mapping of TCM scatterlist memory ib_sge dma_addr. */ @@ -2410,7 +2395,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off; ib_sge->length = min_t(u32, data_left, ib_sg_dma_len(ib_dev, tmp_sg) - page_off); - ib_sge->lkey = device->mr->lkey; + ib_sge->lkey = device->pd->local_dma_lkey; isert_dbg("RDMA ib_sge: addr: 0x%llx length: %u lkey: %x\n", ib_sge->addr, ib_sge->length, ib_sge->lkey); @@ -2422,11 +2407,11 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge); } - send_wr->num_sge = ++i; + rdma_wr->wr.num_sge = ++i; isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n", - send_wr->sg_list, send_wr->num_sge); + rdma_wr->wr.sg_list, rdma_wr->wr.num_sge); - return send_wr->num_sge; + return rdma_wr->wr.num_sge; } static int @@ -2437,7 +2422,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; struct isert_data_buf *data = &wr->data; - struct ib_send_wr *send_wr; + struct ib_rdma_wr *rdma_wr; struct ib_sge *ib_sge; u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; int ret = 0, i, ib_sge_cnt; @@ -2462,11 +2447,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } wr->ib_sge = ib_sge; - wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); - wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, + wr->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); + wr->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) * wr->rdma_wr_num, GFP_KERNEL); - if (!wr->send_wr) { - isert_dbg("Unable to allocate wr->send_wr\n"); + if (!wr->rdma_wr) { + isert_dbg("Unable to allocate wr->rdma_wr\n"); ret = -ENOMEM; goto unmap_cmd; } @@ -2474,31 +2459,31 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, wr->isert_cmd = isert_cmd; rdma_write_max = isert_conn->max_sge * PAGE_SIZE; - for (i = 0; i < wr->send_wr_num; i++) { - send_wr = &isert_cmd->rdma_wr.send_wr[i]; + for (i = 0; i < wr->rdma_wr_num; i++) { + rdma_wr = &isert_cmd->rdma_wr.rdma_wr[i]; data_len = min(data_left, rdma_write_max); - send_wr->send_flags = 0; + rdma_wr->wr.send_flags = 0; if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { - send_wr->opcode = IB_WR_RDMA_WRITE; - send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; - send_wr->wr.rdma.rkey = isert_cmd->read_stag; - if (i + 1 == wr->send_wr_num) - send_wr->next = &isert_cmd->tx_desc.send_wr; + rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; + rdma_wr->remote_addr = isert_cmd->read_va + offset; + rdma_wr->rkey = isert_cmd->read_stag; + if (i + 1 == wr->rdma_wr_num) + rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr; else - send_wr->next = &wr->send_wr[i + 1]; + rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr; } else { - send_wr->opcode = IB_WR_RDMA_READ; - send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; - send_wr->wr.rdma.rkey = isert_cmd->write_stag; - if (i + 1 == wr->send_wr_num) - send_wr->send_flags = IB_SEND_SIGNALED; + rdma_wr->wr.opcode = IB_WR_RDMA_READ; + rdma_wr->remote_addr = isert_cmd->write_va + va_offset; + rdma_wr->rkey = isert_cmd->write_stag; + if (i + 1 == wr->rdma_wr_num) + rdma_wr->wr.send_flags = IB_SEND_SIGNALED; else - send_wr->next = &wr->send_wr[i + 1]; + rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr; } ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, - send_wr, data_len, offset); + rdma_wr, data_len, offset); ib_sge += ib_sge_cnt; offset += data_len; @@ -2513,45 +2498,6 @@ unmap_cmd: return ret; } -static int -isert_map_fr_pagelist(struct ib_device *ib_dev, - struct scatterlist *sg_start, int sg_nents, u64 *fr_pl) -{ - u64 start_addr, end_addr, page, chunk_start = 0; - struct scatterlist *tmp_sg; - int i = 0, new_chunk, last_ent, n_pages; - - n_pages = 0; - new_chunk = 1; - last_ent = sg_nents - 1; - for_each_sg(sg_start, tmp_sg, sg_nents, i) { - start_addr = ib_sg_dma_address(ib_dev, tmp_sg); - if (new_chunk) - chunk_start = start_addr; - end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg); - - isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n", - i, (unsigned long long)tmp_sg->dma_address, - tmp_sg->length); - - if ((end_addr & ~PAGE_MASK) && i < last_ent) { - new_chunk = 0; - continue; - } - new_chunk = 1; - - page = chunk_start & PAGE_MASK; - do { - fr_pl[n_pages++] = page; - isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n", - n_pages - 1, page); - page += PAGE_SIZE; - } while (page < end_addr); - } - - return n_pages; -} - static inline void isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) { @@ -2577,14 +2523,12 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, struct isert_device *device = isert_conn->device; struct ib_device *ib_dev = device->ib_device; struct ib_mr *mr; - struct ib_fast_reg_page_list *frpl; - struct ib_send_wr fr_wr, inv_wr; - struct ib_send_wr *bad_wr, *wr = NULL; - int ret, pagelist_len; - u32 page_off; + struct ib_reg_wr reg_wr; + struct ib_send_wr inv_wr, *bad_wr, *wr = NULL; + int ret, n; if (mem->dma_nents == 1) { - sge->lkey = device->mr->lkey; + sge->lkey = device->pd->local_dma_lkey; sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]); sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]); isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n", @@ -2592,45 +2536,41 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, return 0; } - if (ind == ISERT_DATA_KEY_VALID) { + if (ind == ISERT_DATA_KEY_VALID) /* Registering data buffer */ mr = fr_desc->data_mr; - frpl = fr_desc->data_frpl; - } else { + else /* Registering protection buffer */ mr = fr_desc->pi_ctx->prot_mr; - frpl = fr_desc->pi_ctx->prot_frpl; - } - - page_off = mem->offset % PAGE_SIZE; - - isert_dbg("Use fr_desc %p sg_nents %d offset %u\n", - fr_desc, mem->nents, mem->offset); - - pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents, - &frpl->page_list[0]); if (!(fr_desc->ind & ind)) { isert_inv_rkey(&inv_wr, mr); wr = &inv_wr; } - /* Prepare FASTREG WR */ - memset(&fr_wr, 0, sizeof(fr_wr)); - fr_wr.wr_id = ISER_FASTREG_LI_WRID; - fr_wr.opcode = IB_WR_FAST_REG_MR; - fr_wr.wr.fast_reg.iova_start = frpl->page_list[0] + page_off; - fr_wr.wr.fast_reg.page_list = frpl; - fr_wr.wr.fast_reg.page_list_len = pagelist_len; - fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fr_wr.wr.fast_reg.length = mem->len; - fr_wr.wr.fast_reg.rkey = mr->rkey; - fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; + n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE); + if (unlikely(n != mem->nents)) { + isert_err("failed to map mr sg (%d/%d)\n", + n, mem->nents); + return n < 0 ? n : -EINVAL; + } + + isert_dbg("Use fr_desc %p sg_nents %d offset %u\n", + fr_desc, mem->nents, mem->offset); + + reg_wr.wr.next = NULL; + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = ISER_FASTREG_LI_WRID; + reg_wr.wr.send_flags = 0; + reg_wr.wr.num_sge = 0; + reg_wr.mr = mr; + reg_wr.key = mr->lkey; + reg_wr.access = IB_ACCESS_LOCAL_WRITE; if (!wr) - wr = &fr_wr; + wr = ®_wr.wr; else - wr->next = &fr_wr; + wr->next = ®_wr.wr; ret = ib_post_send(isert_conn->qp, wr, &bad_wr); if (ret) { @@ -2640,8 +2580,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, fr_desc->ind &= ~ind; sge->lkey = mr->lkey; - sge->addr = frpl->page_list[0] + page_off; - sge->length = mem->len; + sge->addr = mr->iova; + sge->length = mr->length; isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n", sge->addr, sge->length, sge->lkey); @@ -2711,8 +2651,8 @@ isert_reg_sig_mr(struct isert_conn *isert_conn, struct isert_rdma_wr *rdma_wr, struct fast_reg_descriptor *fr_desc) { - struct ib_send_wr sig_wr, inv_wr; - struct ib_send_wr *bad_wr, *wr = NULL; + struct ib_sig_handover_wr sig_wr; + struct ib_send_wr inv_wr, *bad_wr, *wr = NULL; struct pi_context *pi_ctx = fr_desc->pi_ctx; struct ib_sig_attrs sig_attrs; int ret; @@ -2730,20 +2670,20 @@ isert_reg_sig_mr(struct isert_conn *isert_conn, } memset(&sig_wr, 0, sizeof(sig_wr)); - sig_wr.opcode = IB_WR_REG_SIG_MR; - sig_wr.wr_id = ISER_FASTREG_LI_WRID; - sig_wr.sg_list = &rdma_wr->ib_sg[DATA]; - sig_wr.num_sge = 1; - sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE; - sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; - sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; + sig_wr.wr.opcode = IB_WR_REG_SIG_MR; + sig_wr.wr.wr_id = ISER_FASTREG_LI_WRID; + sig_wr.wr.sg_list = &rdma_wr->ib_sg[DATA]; + sig_wr.wr.num_sge = 1; + sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; + sig_wr.sig_attrs = &sig_attrs; + sig_wr.sig_mr = pi_ctx->sig_mr; if (se_cmd->t_prot_sg) - sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT]; + sig_wr.prot = &rdma_wr->ib_sg[PROT]; if (!wr) - wr = &sig_wr; + wr = &sig_wr.wr; else - wr->next = &sig_wr; + wr->next = &sig_wr.wr; ret = ib_post_send(isert_conn->qp, wr, &bad_wr); if (ret) { @@ -2837,7 +2777,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; struct fast_reg_descriptor *fr_desc = NULL; - struct ib_send_wr *send_wr; + struct ib_rdma_wr *rdma_wr; struct ib_sge *ib_sg; u32 offset; int ret = 0; @@ -2878,26 +2818,26 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg)); wr->ib_sge = &wr->s_ib_sge; - wr->send_wr_num = 1; - memset(&wr->s_send_wr, 0, sizeof(*send_wr)); - wr->send_wr = &wr->s_send_wr; + wr->rdma_wr_num = 1; + memset(&wr->s_rdma_wr, 0, sizeof(wr->s_rdma_wr)); + wr->rdma_wr = &wr->s_rdma_wr; wr->isert_cmd = isert_cmd; - send_wr = &isert_cmd->rdma_wr.s_send_wr; - send_wr->sg_list = &wr->s_ib_sge; - send_wr->num_sge = 1; - send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; + rdma_wr = &isert_cmd->rdma_wr.s_rdma_wr; + rdma_wr->wr.sg_list = &wr->s_ib_sge; + rdma_wr->wr.num_sge = 1; + rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc; if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { - send_wr->opcode = IB_WR_RDMA_WRITE; - send_wr->wr.rdma.remote_addr = isert_cmd->read_va; - send_wr->wr.rdma.rkey = isert_cmd->read_stag; - send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ? + rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; + rdma_wr->remote_addr = isert_cmd->read_va; + rdma_wr->rkey = isert_cmd->read_stag; + rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ? 0 : IB_SEND_SIGNALED; } else { - send_wr->opcode = IB_WR_RDMA_READ; - send_wr->wr.rdma.remote_addr = isert_cmd->write_va; - send_wr->wr.rdma.rkey = isert_cmd->write_stag; - send_wr->send_flags = IB_SEND_SIGNALED; + rdma_wr->wr.opcode = IB_WR_RDMA_READ; + rdma_wr->remote_addr = isert_cmd->write_va; + rdma_wr->rkey = isert_cmd->write_stag; + rdma_wr->wr.send_flags = IB_SEND_SIGNALED; } return 0; @@ -2945,11 +2885,17 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); isert_init_send_wr(isert_conn, isert_cmd, &isert_cmd->tx_desc.send_wr); - isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr; - wr->send_wr_num += 1; + isert_cmd->rdma_wr.s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr; + wr->rdma_wr_num += 1; + + rc = isert_post_recv(isert_conn, isert_cmd->rx_desc); + if (rc) { + isert_err("ib_post_recv failed with %d\n", rc); + return rc; + } } - rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed); + rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed); if (rc) isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); @@ -2983,7 +2929,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) return rc; } - rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed); + rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed); if (rc) isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); @@ -2996,9 +2942,16 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) static int isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) { - int ret; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + int ret = 0; switch (state) { + case ISTATE_REMOVE: + spin_lock_bh(&conn->cmd_lock); + list_del_init(&cmd->i_conn_node); + spin_unlock_bh(&conn->cmd_lock); + isert_put_cmd(isert_cmd, true); + break; case ISTATE_SEND_NOPIN_WANT_RESPONSE: ret = isert_put_nopin(cmd, conn, false); break; @@ -3062,7 +3015,7 @@ isert_setup_id(struct isert_np *isert_np) sa = (struct sockaddr *)&np->np_sockaddr; isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa); - id = rdma_create_id(isert_cma_handler, isert_np, + id = rdma_create_id(&init_net, isert_cma_handler, isert_np, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id)); @@ -3092,7 +3045,7 @@ out: static int isert_setup_np(struct iscsi_np *np, - struct __kernel_sockaddr_storage *ksockaddr) + struct sockaddr_storage *ksockaddr) { struct isert_np *isert_np; struct rdma_cm_id *isert_lid; @@ -3103,10 +3056,10 @@ isert_setup_np(struct iscsi_np *np, isert_err("Unable to allocate struct isert_np\n"); return -ENOMEM; } - sema_init(&isert_np->np_sem, 0); - mutex_init(&isert_np->np_accept_mutex); - INIT_LIST_HEAD(&isert_np->np_accept_list); - init_completion(&isert_np->np_login_comp); + sema_init(&isert_np->sem, 0); + mutex_init(&isert_np->mutex); + INIT_LIST_HEAD(&isert_np->accepted); + INIT_LIST_HEAD(&isert_np->pending); isert_np->np = np; /* @@ -3114,7 +3067,7 @@ isert_setup_np(struct iscsi_np *np, * in iscsi_target_configfs.c code.. */ memcpy(&np->np_sockaddr, ksockaddr, - sizeof(struct __kernel_sockaddr_storage)); + sizeof(struct sockaddr_storage)); isert_lid = isert_setup_id(isert_np); if (IS_ERR(isert_lid)) { @@ -3122,7 +3075,7 @@ isert_setup_np(struct iscsi_np *np, goto out; } - isert_np->np_cm_id = isert_lid; + isert_np->cm_id = isert_lid; np->np_context = isert_np; return 0; @@ -3196,32 +3149,11 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn, { struct rdma_cm_id *cm_id = isert_conn->cm_id; struct rdma_route *cm_route = &cm_id->route; - struct sockaddr_in *sock_in; - struct sockaddr_in6 *sock_in6; conn->login_family = np->np_sockaddr.ss_family; - if (np->np_sockaddr.ss_family == AF_INET6) { - sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.dst_addr; - snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c", - &sock_in6->sin6_addr.in6_u); - conn->login_port = ntohs(sock_in6->sin6_port); - - sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.src_addr; - snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI6c", - &sock_in6->sin6_addr.in6_u); - conn->local_port = ntohs(sock_in6->sin6_port); - } else { - sock_in = (struct sockaddr_in *)&cm_route->addr.dst_addr; - sprintf(conn->login_ip, "%pI4", - &sock_in->sin_addr.s_addr); - conn->login_port = ntohs(sock_in->sin_port); - - sock_in = (struct sockaddr_in *)&cm_route->addr.src_addr; - sprintf(conn->local_ip, "%pI4", - &sock_in->sin_addr.s_addr); - conn->local_port = ntohs(sock_in->sin_port); - } + conn->login_sockaddr = cm_route->addr.dst_addr; + conn->local_sockaddr = cm_route->addr.src_addr; } static int @@ -3232,7 +3164,7 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) int ret; accept_wait: - ret = down_interruptible(&isert_np->np_sem); + ret = down_interruptible(&isert_np->sem); if (ret) return -ENODEV; @@ -3249,15 +3181,15 @@ accept_wait: } spin_unlock_bh(&np->np_thread_lock); - mutex_lock(&isert_np->np_accept_mutex); - if (list_empty(&isert_np->np_accept_list)) { - mutex_unlock(&isert_np->np_accept_mutex); + mutex_lock(&isert_np->mutex); + if (list_empty(&isert_np->pending)) { + mutex_unlock(&isert_np->mutex); goto accept_wait; } - isert_conn = list_first_entry(&isert_np->np_accept_list, - struct isert_conn, accept_node); - list_del_init(&isert_conn->accept_node); - mutex_unlock(&isert_np->np_accept_mutex); + isert_conn = list_first_entry(&isert_np->pending, + struct isert_conn, node); + list_del_init(&isert_conn->node); + mutex_unlock(&isert_np->mutex); conn->context = isert_conn; isert_conn->conn = conn; @@ -3275,28 +3207,39 @@ isert_free_np(struct iscsi_np *np) struct isert_np *isert_np = np->np_context; struct isert_conn *isert_conn, *n; - if (isert_np->np_cm_id) - rdma_destroy_id(isert_np->np_cm_id); + if (isert_np->cm_id) + rdma_destroy_id(isert_np->cm_id); /* * FIXME: At this point we don't have a good way to insure * that at this point we don't have hanging connections that * completed RDMA establishment but didn't start iscsi login * process. So work-around this by cleaning up what ever piled - * up in np_accept_list. + * up in accepted and pending lists. */ - mutex_lock(&isert_np->np_accept_mutex); - if (!list_empty(&isert_np->np_accept_list)) { - isert_info("Still have isert connections, cleaning up...\n"); + mutex_lock(&isert_np->mutex); + if (!list_empty(&isert_np->pending)) { + isert_info("Still have isert pending connections\n"); list_for_each_entry_safe(isert_conn, n, - &isert_np->np_accept_list, - accept_node) { + &isert_np->pending, + node) { isert_info("cleaning isert_conn %p state (%d)\n", isert_conn, isert_conn->state); isert_connect_release(isert_conn); } } - mutex_unlock(&isert_np->np_accept_mutex); + + if (!list_empty(&isert_np->accepted)) { + isert_info("Still have isert accepted connections\n"); + list_for_each_entry_safe(isert_conn, n, + &isert_np->accepted, + node) { + isert_info("cleaning isert_conn %p state (%d)\n", + isert_conn, isert_conn->state); + isert_connect_release(isert_conn); + } + } + mutex_unlock(&isert_np->mutex); np->np_context = NULL; kfree(isert_np); @@ -3363,6 +3306,41 @@ isert_wait4flush(struct isert_conn *isert_conn) wait_for_completion(&isert_conn->wait_comp_err); } +/** + * isert_put_unsol_pending_cmds() - Drop commands waiting for + * unsolicitate dataout + * @conn: iscsi connection + * + * We might still have commands that are waiting for unsolicited + * dataouts messages. We must put the extra reference on those + * before blocking on the target_wait_for_session_cmds + */ +static void +isert_put_unsol_pending_cmds(struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd, *tmp; + static LIST_HEAD(drop_cmd_list); + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, tmp, &conn->conn_cmd_list, i_conn_node) { + if ((cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA) && + (cmd->write_data_done < conn->sess->sess_ops->FirstBurstLength) && + (cmd->write_data_done < cmd->se_cmd.data_length)) + list_move_tail(&cmd->i_conn_node, &drop_cmd_list); + } + spin_unlock_bh(&conn->cmd_lock); + + list_for_each_entry_safe(cmd, tmp, &drop_cmd_list, i_conn_node) { + list_del_init(&cmd->i_conn_node); + if (cmd->i_state != ISTATE_REMOVE) { + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + + isert_info("conn %p dropping cmd %p\n", conn, cmd); + isert_put_cmd(isert_cmd, true); + } + } +} + static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; @@ -3381,8 +3359,9 @@ static void isert_wait_conn(struct iscsi_conn *conn) isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->mutex); - isert_wait4cmds(conn); isert_wait4flush(isert_conn); + isert_put_unsol_pending_cmds(conn); + isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/kernel/drivers/infiniband/ulp/isert/ib_isert.h b/kernel/drivers/infiniband/ulp/isert/ib_isert.h index 9ec23a786..3d7fbc47c 100644 --- a/kernel/drivers/infiniband/ulp/isert/ib_isert.h +++ b/kernel/drivers/infiniband/ulp/isert/ib_isert.h @@ -84,14 +84,12 @@ enum isert_indicator { struct pi_context { struct ib_mr *prot_mr; - struct ib_fast_reg_page_list *prot_frpl; struct ib_mr *sig_mr; }; struct fast_reg_descriptor { struct list_head list; struct ib_mr *data_mr; - struct ib_fast_reg_page_list *data_frpl; u8 ind; struct pi_context *pi_ctx; }; @@ -113,14 +111,13 @@ enum { }; struct isert_rdma_wr { - struct list_head wr_list; struct isert_cmd *isert_cmd; enum iser_ib_op_code iser_ib_op; struct ib_sge *ib_sge; struct ib_sge s_ib_sge; - int send_wr_num; - struct ib_send_wr *send_wr; - struct ib_send_wr s_send_wr; + int rdma_wr_num; + struct ib_rdma_wr *rdma_wr; + struct ib_rdma_wr s_rdma_wr; struct ib_sge ib_sg[3]; struct isert_data_buf data; struct isert_data_buf prot; @@ -134,14 +131,13 @@ struct isert_cmd { uint64_t write_va; u64 pdu_buf_dma; u32 pdu_buf_len; - u32 read_va_off; - u32 write_va_off; - u32 rdma_wr_num; struct isert_conn *conn; struct iscsi_cmd *iscsi_cmd; struct iser_tx_desc tx_desc; + struct iser_rx_desc *rx_desc; struct isert_rdma_wr rdma_wr; struct work_struct comp_work; + struct scatterlist sg; }; struct isert_device; @@ -159,11 +155,10 @@ struct isert_conn { u64 login_req_dma; int login_req_len; u64 login_rsp_dma; - unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; - struct ib_recv_wr rx_wr[ISERT_MIN_POSTED_RX]; + struct ib_recv_wr rx_wr[ISERT_QP_MAX_RECV_DTOS]; struct iscsi_conn *conn; - struct list_head accept_node; + struct list_head node; struct completion login_comp; struct completion login_req_comp; struct iser_tx_desc login_tx_desc; @@ -209,7 +204,6 @@ struct isert_device { int refcount; struct ib_device *ib_device; struct ib_pd *pd; - struct ib_mr *mr; struct isert_comp *comps; int comps_used; struct list_head dev_node; @@ -223,9 +217,9 @@ struct isert_device { struct isert_np { struct iscsi_np *np; - struct semaphore np_sem; - struct rdma_cm_id *np_cm_id; - struct mutex np_accept_mutex; - struct list_head np_accept_list; - struct completion np_login_comp; + struct semaphore sem; + struct rdma_cm_id *cm_id; + struct mutex mutex; + struct list_head accepted; + struct list_head pending; }; diff --git a/kernel/drivers/infiniband/ulp/srp/ib_srp.c b/kernel/drivers/infiniband/ulp/srp/ib_srp.c index 025f93105..3db9a6597 100644 --- a/kernel/drivers/infiniband/ulp/srp/ib_srp.c +++ b/kernel/drivers/infiniband/ulp/srp/ib_srp.c @@ -55,20 +55,21 @@ #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "1.0" -#define DRV_RELDATE "July 1, 2013" +#define DRV_VERSION "2.0" +#define DRV_RELDATE "July 26, 2015" MODULE_AUTHOR("Roland Dreier"); -MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " - "v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); +MODULE_INFO(release_date, DRV_RELDATE); static unsigned int srp_sg_tablesize; static unsigned int cmd_sg_entries; static unsigned int indirect_sg_entries; static bool allow_ext_sg; -static bool prefer_fr; -static bool register_always; +static bool prefer_fr = true; +static bool register_always = true; static int topspin_workarounds = 1; module_param(srp_sg_tablesize, uint, 0444); @@ -98,7 +99,7 @@ module_param(register_always, bool, 0444); MODULE_PARM_DESC(register_always, "Use memory registration even for contiguous memory regions"); -static struct kernel_param_ops srp_tmo_ops; +static const struct kernel_param_ops srp_tmo_ops; static int srp_reconnect_delay = 10; module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, @@ -130,7 +131,7 @@ MODULE_PARM_DESC(ch_count, "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); static void srp_add_one(struct ib_device *device); -static void srp_remove_one(struct ib_device *device); +static void srp_remove_one(struct ib_device *device, void *client_data); static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr); static void srp_send_completion(struct ib_cq *cq, void *ch_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); @@ -160,13 +161,10 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp) { int tmo, res; - if (strncmp(val, "off", 3) != 0) { - res = kstrtoint(val, 0, &tmo); - if (res) - goto out; - } else { - tmo = -1; - } + res = srp_parse_tmo(&tmo, val); + if (res) + goto out; + if (kp->arg == &srp_reconnect_delay) res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, srp_dev_loss_tmo); @@ -183,7 +181,7 @@ out: return res; } -static struct kernel_param_ops srp_tmo_ops = { +static const struct kernel_param_ops srp_tmo_ops = { .get = srp_tmo_get, .set = srp_tmo_set, }; @@ -253,7 +251,8 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) static void srp_qp_event(struct ib_event *event, void *context) { - pr_debug("QP event %d\n", event->event); + pr_debug("QP event %s (%d)\n", + ib_event_msg(event->event), event->event); } static int srp_init_qp(struct srp_target_port *target, @@ -341,8 +340,6 @@ static void srp_destroy_fr_pool(struct srp_fr_pool *pool) return; for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { - if (d->frpl) - ib_free_fast_reg_page_list(d->frpl); if (d->mr) ib_dereg_mr(d->mr); } @@ -363,7 +360,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, struct srp_fr_pool *pool; struct srp_fr_desc *d; struct ib_mr *mr; - struct ib_fast_reg_page_list *frpl; int i, ret = -EINVAL; if (pool_size <= 0) @@ -379,18 +375,13 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, INIT_LIST_HEAD(&pool->free_list); for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { - mr = ib_alloc_fast_reg_mr(pd, max_page_list_len); + mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, + max_page_list_len); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto destroy_pool; } d->mr = mr; - frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len); - if (IS_ERR(frpl)) { - ret = PTR_ERR(frpl); - goto destroy_pool; - } - d->frpl = frpl; list_add_tail(&d->entry, &pool->free_list); } @@ -497,7 +488,8 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) struct ib_qp *qp; struct ib_fmr_pool *fmr_pool = NULL; struct srp_fr_pool *fr_pool = NULL; - const int m = 1 + dev->use_fast_reg; + const int m = dev->use_fast_reg ? 3 : 1; + struct ib_cq_init_attr cq_attr = {}; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); @@ -505,15 +497,19 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) return -ENOMEM; /* + 1 for SRP_LAST_WR_ID */ + cq_attr.cqe = target->queue_size + 1; + cq_attr.comp_vector = ch->comp_vector; recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch, - target->queue_size + 1, ch->comp_vector); + &cq_attr); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } + cq_attr.cqe = m * target->queue_size; + cq_attr.comp_vector = ch->comp_vector; send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch, - m * target->queue_size, ch->comp_vector); + &cq_attr); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; @@ -541,7 +537,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) if (ret) goto err_qp; - if (dev->use_fast_reg && dev->has_fr) { + if (dev->use_fast_reg) { fr_pool = srp_alloc_fr_pool(target); if (IS_ERR(fr_pool)) { ret = PTR_ERR(fr_pool); @@ -549,10 +545,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) "FR pool allocation failed (%d)\n", ret); goto err_qp; } - if (ch->fr_pool) - srp_destroy_fr_pool(ch->fr_pool); - ch->fr_pool = fr_pool; - } else if (!dev->use_fast_reg && dev->has_fmr) { + } else if (dev->use_fmr) { fmr_pool = srp_alloc_fmr_pool(target); if (IS_ERR(fmr_pool)) { ret = PTR_ERR(fmr_pool); @@ -560,9 +553,6 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) "FMR pool allocation failed (%d)\n", ret); goto err_qp; } - if (ch->fmr_pool) - ib_destroy_fmr_pool(ch->fmr_pool); - ch->fmr_pool = fmr_pool; } if (ch->qp) @@ -576,6 +566,16 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) ch->recv_cq = recv_cq; ch->send_cq = send_cq; + if (dev->use_fast_reg) { + if (ch->fr_pool) + srp_destroy_fr_pool(ch->fr_pool); + ch->fr_pool = fr_pool; + } else if (dev->use_fmr) { + if (ch->fmr_pool) + ib_destroy_fmr_pool(ch->fmr_pool); + ch->fmr_pool = fmr_pool; + } + kfree(init_attr); return 0; @@ -618,7 +618,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, if (dev->use_fast_reg) { if (ch->fr_pool) srp_destroy_fr_pool(ch->fr_pool); - } else { + } else if (dev->use_fmr) { if (ch->fmr_pool) ib_destroy_fmr_pool(ch->fmr_pool); } @@ -780,7 +780,7 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) shost_printk(KERN_DEBUG, target->scsi_host, PFX "Topspin/Cisco initiator port ID workaround " "activated for target GUID %016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + be64_to_cpu(target->ioc_guid)); memset(req->priv.initiator_port_id, 0, 8); memcpy(req->priv.initiator_port_id + 8, &target->srp_host->srp_dev->dev->node_guid, 8); @@ -835,16 +835,17 @@ static void srp_free_req_data(struct srp_target_port *target, struct srp_request *req; int i; - if (!ch->target || !ch->req_ring) + if (!ch->req_ring) return; for (i = 0; i < target->req_ring_size; ++i) { req = &ch->req_ring[i]; - if (dev->use_fast_reg) + if (dev->use_fast_reg) { kfree(req->fr_list); - else + } else { kfree(req->fmr_list); - kfree(req->map_page); + kfree(req->map_page); + } if (req->indirect_dma_addr) { ib_dma_unmap_single(ibdev, req->indirect_dma_addr, target->indirect_size, @@ -878,14 +879,15 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch) GFP_KERNEL); if (!mr_list) goto out; - if (srp_dev->use_fast_reg) + if (srp_dev->use_fast_reg) { req->fr_list = mr_list; - else + } else { req->fmr_list = mr_list; - req->map_page = kmalloc(srp_dev->max_pages_per_mr * - sizeof(void *), GFP_KERNEL); - if (!req->map_page) - goto out; + req->map_page = kmalloc(srp_dev->max_pages_per_mr * + sizeof(void *), GFP_KERNEL); + if (!req->map_page) + goto out; + } req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); if (!req->indirect_desc) goto out; @@ -992,16 +994,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) ret = srp_lookup_path(ch); if (ret) - return ret; + goto out; while (1) { init_completion(&ch->done); ret = srp_send_req(ch, multich); if (ret) - return ret; + goto out; ret = wait_for_completion_interruptible(&ch->done); if (ret < 0) - return ret; + goto out; /* * The CM event handling code will set status to @@ -1009,15 +1011,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) * back, or SRP_DLID_REDIRECT if we get a lid/qp * redirect REJ back. */ - switch (ch->status) { + ret = ch->status; + switch (ret) { case 0: ch->connected = true; - return 0; + goto out; case SRP_PORT_REDIRECT: ret = srp_lookup_path(ch); if (ret) - return ret; + goto out; break; case SRP_DLID_REDIRECT: @@ -1026,13 +1029,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) case SRP_STALE_CONN: shost_printk(KERN_ERR, target->scsi_host, PFX "giving up on stale connection\n"); - ch->status = -ECONNRESET; - return ch->status; + ret = -ECONNRESET; + goto out; default: - return ch->status; + goto out; } } + +out: + return ret <= 0 ? ret : -ENODEV; } static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey) @@ -1080,7 +1086,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, if (req->nmdesc) srp_fr_pool_put(ch->fr_pool, req->fr_list, req->nmdesc); - } else { + } else if (dev->use_fmr) { struct ib_pool_fmr **pfmr; for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) @@ -1209,14 +1215,10 @@ static int srp_rport_reconnect(struct srp_rport *rport) */ for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; ret += srp_new_cm_id(ch); } for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; for (j = 0; j < target->req_ring_size; ++j) { struct srp_request *req = &ch->req_ring[j]; @@ -1225,8 +1227,6 @@ static int srp_rport_reconnect(struct srp_rport *rport) } for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; /* * Whether or not creating a new CM ID succeeded, create a new * QP. This guarantees that all completion callback function @@ -1243,7 +1243,7 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (ret || !ch->target) + if (ret) break; ret = srp_connect_ch(ch, multich); multich = true; @@ -1261,6 +1261,8 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, { struct srp_direct_buf *desc = state->desc; + WARN_ON_ONCE(!dma_len); + desc->va = cpu_to_be64(dma_addr); desc->key = cpu_to_be32(rkey); desc->len = cpu_to_be32(dma_len); @@ -1273,31 +1275,68 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, static int srp_map_finish_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch) { + struct srp_target_port *target = ch->target; + struct srp_device *dev = target->srp_host->srp_dev; struct ib_pool_fmr *fmr; u64 io_addr = 0; + if (state->fmr.next >= state->fmr.end) + return -ENOMEM; + + WARN_ON_ONCE(!dev->use_fmr); + + if (state->npages == 0) + return 0; + + if (state->npages == 1 && target->global_mr) { + srp_map_desc(state, state->base_dma_addr, state->dma_len, + target->global_mr->rkey); + goto reset_state; + } + fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, state->npages, io_addr); if (IS_ERR(fmr)) return PTR_ERR(fmr); - *state->next_fmr++ = fmr; + *state->fmr.next++ = fmr; state->nmdesc++; - srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey); + srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, + state->dma_len, fmr->fmr->rkey); + +reset_state: + state->npages = 0; + state->dma_len = 0; return 0; } static int srp_map_finish_fr(struct srp_map_state *state, - struct srp_rdma_ch *ch) + struct srp_rdma_ch *ch, int sg_nents) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; struct ib_send_wr *bad_wr; - struct ib_send_wr wr; + struct ib_reg_wr wr; struct srp_fr_desc *desc; u32 rkey; + int n, err; + + if (state->fr.next >= state->fr.end) + return -ENOMEM; + + WARN_ON_ONCE(!dev->use_fast_reg); + + if (sg_nents == 0) + return 0; + + if (sg_nents == 1 && target->global_mr) { + srp_map_desc(state, sg_dma_address(state->sg), + sg_dma_len(state->sg), + target->global_mr->rkey); + return 1; + } desc = srp_fr_pool_get(ch->fr_pool); if (!desc) @@ -1306,125 +1345,54 @@ static int srp_map_finish_fr(struct srp_map_state *state, rkey = ib_inc_rkey(desc->mr->rkey); ib_update_fast_reg_key(desc->mr, rkey); - memcpy(desc->frpl->page_list, state->pages, - sizeof(state->pages[0]) * state->npages); - - memset(&wr, 0, sizeof(wr)); - wr.opcode = IB_WR_FAST_REG_MR; - wr.wr_id = FAST_REG_WR_ID_MASK; - wr.wr.fast_reg.iova_start = state->base_dma_addr; - wr.wr.fast_reg.page_list = desc->frpl; - wr.wr.fast_reg.page_list_len = state->npages; - wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); - wr.wr.fast_reg.length = state->dma_len; - wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - wr.wr.fast_reg.rkey = desc->mr->lkey; - - *state->next_fr++ = desc; + n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size); + if (unlikely(n < 0)) + return n; + + wr.wr.next = NULL; + wr.wr.opcode = IB_WR_REG_MR; + wr.wr.wr_id = FAST_REG_WR_ID_MASK; + wr.wr.num_sge = 0; + wr.wr.send_flags = 0; + wr.mr = desc->mr; + wr.key = desc->mr->rkey; + wr.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + + *state->fr.next++ = desc; state->nmdesc++; - srp_map_desc(state, state->base_dma_addr, state->dma_len, - desc->mr->rkey); - - return ib_post_send(ch->qp, &wr, &bad_wr); -} - -static int srp_finish_mapping(struct srp_map_state *state, - struct srp_rdma_ch *ch) -{ - struct srp_target_port *target = ch->target; - int ret = 0; - - if (state->npages == 0) - return 0; - - if (state->npages == 1 && !register_always) - srp_map_desc(state, state->base_dma_addr, state->dma_len, - target->rkey); - else - ret = target->srp_host->srp_dev->use_fast_reg ? - srp_map_finish_fr(state, ch) : - srp_map_finish_fmr(state, ch); + srp_map_desc(state, desc->mr->iova, + desc->mr->length, desc->mr->rkey); - if (ret == 0) { - state->npages = 0; - state->dma_len = 0; - } + err = ib_post_send(ch->qp, &wr.wr, &bad_wr); + if (unlikely(err)) + return err; - return ret; -} - -static void srp_map_update_start(struct srp_map_state *state, - struct scatterlist *sg, int sg_index, - dma_addr_t dma_addr) -{ - state->unmapped_sg = sg; - state->unmapped_index = sg_index; - state->unmapped_addr = dma_addr; + return n; } static int srp_map_sg_entry(struct srp_map_state *state, struct srp_rdma_ch *ch, - struct scatterlist *sg, int sg_index, - bool use_mr) + struct scatterlist *sg, int sg_index) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; struct ib_device *ibdev = dev->dev; dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); unsigned int dma_len = ib_sg_dma_len(ibdev, sg); - unsigned int len; + unsigned int len = 0; int ret; - if (!dma_len) - return 0; - - if (!use_mr) { - /* - * Once we're in direct map mode for a request, we don't - * go back to FMR or FR mode, so no need to update anything - * other than the descriptor. - */ - srp_map_desc(state, dma_addr, dma_len, target->rkey); - return 0; - } - - /* - * Since not all RDMA HW drivers support non-zero page offsets for - * FMR, if we start at an offset into a page, don't merge into the - * current FMR mapping. Finish it out, and use the kernel's MR for - * this sg entry. - */ - if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) || - dma_len > dev->mr_max_size) { - ret = srp_finish_mapping(state, ch); - if (ret) - return ret; - - srp_map_desc(state, dma_addr, dma_len, target->rkey); - srp_map_update_start(state, NULL, 0, 0); - return 0; - } - - /* - * If this is the first sg that will be mapped via FMR or via FR, save - * our position. We need to know the first unmapped entry, its index, - * and the first unmapped address within that entry to be able to - * restart mapping after an error. - */ - if (!state->unmapped_sg) - srp_map_update_start(state, sg, sg_index, dma_addr); + WARN_ON_ONCE(!dma_len); while (dma_len) { unsigned offset = dma_addr & ~dev->mr_page_mask; if (state->npages == dev->max_pages_per_mr || offset != 0) { - ret = srp_finish_mapping(state, ch); + ret = srp_map_finish_fmr(state, ch); if (ret) return ret; - - srp_map_update_start(state, sg, sg_index, dma_addr); } len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); @@ -1443,78 +1411,152 @@ static int srp_map_sg_entry(struct srp_map_state *state, * boundries. */ ret = 0; - if (len != dev->mr_page_size) { - ret = srp_finish_mapping(state, ch); - if (!ret) - srp_map_update_start(state, NULL, 0, 0); - } + if (len != dev->mr_page_size) + ret = srp_map_finish_fmr(state, ch); return ret; } -static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch, - struct srp_request *req, struct scatterlist *scat, - int count) +static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, + struct srp_request *req, struct scatterlist *scat, + int count) +{ + struct scatterlist *sg; + int i, ret; + + state->desc = req->indirect_desc; + state->pages = req->map_page; + state->fmr.next = req->fmr_list; + state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt; + + for_each_sg(scat, sg, count, i) { + ret = srp_map_sg_entry(state, ch, sg, i); + if (ret) + return ret; + } + + ret = srp_map_finish_fmr(state, ch); + if (ret) + return ret; + + req->nmdesc = state->nmdesc; + + return 0; +} + +static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, + struct srp_request *req, struct scatterlist *scat, + int count) +{ + state->desc = req->indirect_desc; + state->fr.next = req->fr_list; + state->fr.end = req->fr_list + ch->target->cmd_sg_cnt; + state->sg = scat; + + while (count) { + int i, n; + + n = srp_map_finish_fr(state, ch, count); + if (unlikely(n < 0)) + return n; + + count -= n; + for (i = 0; i < n; i++) + state->sg = sg_next(state->sg); + } + + req->nmdesc = state->nmdesc; + + return 0; +} + +static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, + struct srp_request *req, struct scatterlist *scat, + int count) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_device *ibdev = dev->dev; struct scatterlist *sg; int i; - bool use_mr; - - state->desc = req->indirect_desc; - state->pages = req->map_page; - if (dev->use_fast_reg) { - state->next_fr = req->fr_list; - use_mr = !!ch->fr_pool; - } else { - state->next_fmr = req->fmr_list; - use_mr = !!ch->fmr_pool; - } + state->desc = req->indirect_desc; for_each_sg(scat, sg, count, i) { - if (srp_map_sg_entry(state, ch, sg, i, use_mr)) { - /* - * Memory registration failed, so backtrack to the - * first unmapped entry and continue on without using - * memory registration. - */ - dma_addr_t dma_addr; - unsigned int dma_len; - -backtrack: - sg = state->unmapped_sg; - i = state->unmapped_index; - - dma_addr = ib_sg_dma_address(ibdev, sg); - dma_len = ib_sg_dma_len(ibdev, sg); - dma_len -= (state->unmapped_addr - dma_addr); - dma_addr = state->unmapped_addr; - use_mr = false; - srp_map_desc(state, dma_addr, dma_len, target->rkey); - } + srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), + ib_sg_dma_len(dev->dev, sg), + target->global_mr->rkey); } - if (use_mr && srp_finish_mapping(state, ch)) - goto backtrack; - req->nmdesc = state->nmdesc; return 0; } +/* + * Register the indirect data buffer descriptor with the HCA. + * + * Note: since the indirect data buffer descriptor has been allocated with + * kmalloc() it is guaranteed that this buffer is a physically contiguous + * memory buffer. + */ +static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, + void **next_mr, void **end_mr, u32 idb_len, + __be32 *idb_rkey) +{ + struct srp_target_port *target = ch->target; + struct srp_device *dev = target->srp_host->srp_dev; + struct srp_map_state state; + struct srp_direct_buf idb_desc; + u64 idb_pages[1]; + struct scatterlist idb_sg[1]; + int ret; + + memset(&state, 0, sizeof(state)); + memset(&idb_desc, 0, sizeof(idb_desc)); + state.gen.next = next_mr; + state.gen.end = end_mr; + state.desc = &idb_desc; + state.base_dma_addr = req->indirect_dma_addr; + state.dma_len = idb_len; + + if (dev->use_fast_reg) { + state.sg = idb_sg; + sg_set_buf(idb_sg, req->indirect_desc, idb_len); + idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ +#ifdef CONFIG_NEED_SG_DMA_LENGTH + idb_sg->dma_length = idb_sg->length; /* hack^2 */ +#endif + ret = srp_map_finish_fr(&state, ch, 1); + if (ret < 0) + return ret; + } else if (dev->use_fmr) { + state.pages = idb_pages; + state.pages[0] = (req->indirect_dma_addr & + dev->mr_page_mask); + state.npages = 1; + ret = srp_map_finish_fmr(&state, ch); + if (ret < 0) + return ret; + } else { + return -EINVAL; + } + + *idb_rkey = idb_desc.key; + + return 0; +} + static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_request *req) { struct srp_target_port *target = ch->target; struct scatterlist *scat; struct srp_cmd *cmd = req->cmd->buf; - int len, nents, count; + int len, nents, count, ret; struct srp_device *dev; struct ib_device *ibdev; struct srp_map_state state; struct srp_indirect_buf *indirect_hdr; - u32 table_len; + u32 idb_len, table_len; + __be32 idb_rkey; u8 fmt; if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) @@ -1541,7 +1583,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - if (count == 1 && !register_always) { + if (count == 1 && target->global_mr) { /* * The midlayer only generated a single gather/scatter * entry, or DMA mapping coalesced everything to a @@ -1551,7 +1593,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_direct_buf *buf = (void *) cmd->add_data; buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); - buf->key = cpu_to_be32(target->rkey); + buf->key = cpu_to_be32(target->global_mr->rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); req->nmdesc = 0; @@ -1568,7 +1610,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, target->indirect_size, DMA_TO_DEVICE); memset(&state, 0, sizeof(state)); - srp_map_sg(&state, ch, req, scat, count); + if (dev->use_fast_reg) + srp_map_sg_fr(&state, ch, req, scat, count); + else if (dev->use_fmr) + srp_map_sg_fmr(&state, ch, req, scat, count); + else + srp_map_sg_dma(&state, ch, req, scat, count); /* We've mapped the request, now pull as much of the indirect * descriptor table as we can into the command buffer. If this @@ -1596,6 +1643,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, count = min(state.ndesc, target->cmd_sg_cnt); table_len = state.ndesc * sizeof (struct srp_direct_buf); + idb_len = sizeof(struct srp_indirect_buf) + table_len; fmt = SRP_DATA_DESC_INDIRECT; len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); @@ -1604,8 +1652,18 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, memcpy(indirect_hdr->desc_list, req->indirect_desc, count * sizeof (struct srp_direct_buf)); + if (!target->global_mr) { + ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, + idb_len, &idb_rkey); + if (ret < 0) + return ret; + req->nmdesc++; + } else { + idb_rkey = cpu_to_be32(target->global_mr->rkey); + } + indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); - indirect_hdr->table_desc.key = cpu_to_be32(target->rkey); + indirect_hdr->table_desc.key = idb_rkey; indirect_hdr->table_desc.len = cpu_to_be32(table_len); indirect_hdr->len = cpu_to_be32(state.total_len); @@ -1837,7 +1895,7 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch, s32 delta = be32_to_cpu(req->req_lim_delta); shost_printk(KERN_ERR, target->scsi_host, PFX - "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun)); + "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) shost_printk(KERN_ERR, target->scsi_host, PFX @@ -1927,17 +1985,18 @@ static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, if (ch->connected && !target->qp_in_error) { if (wr_id & LOCAL_INV_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX - "LOCAL_INV failed with status %d\n", - wc_status); + "LOCAL_INV failed with status %s (%d)\n", + ib_wc_status_msg(wc_status), wc_status); } else if (wr_id & FAST_REG_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX - "FAST_REG_MR failed status %d\n", - wc_status); + "FAST_REG_MR failed status %s (%d)\n", + ib_wc_status_msg(wc_status), wc_status); } else { shost_printk(KERN_ERR, target->scsi_host, - PFX "failed %s status %d for iu %p\n", + PFX "failed %s status %s (%d) for iu %p\n", send_err ? "send" : "receive", - wc_status, (void *)(uintptr_t)wr_id); + ib_wc_status_msg(wc_status), wc_status, + (void *)(uintptr_t)wr_id); } queue_work(system_long_wq, &target->tl_err_work); } @@ -2029,7 +2088,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) memset(cmd, 0, sizeof *cmd); cmd->opcode = SRP_CMD; - cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48); + int_to_scsilun(scmnd->device->lun, &cmd->lun); cmd->tag = tag; memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); @@ -2172,7 +2231,7 @@ static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) } static void srp_cm_rep_handler(struct ib_cm_id *cm_id, - struct srp_login_rsp *lrsp, + const struct srp_login_rsp *lrsp, struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; @@ -2409,8 +2468,8 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth) return scsi_change_queue_depth(sdev, qdepth); } -static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, - unsigned int lun, u8 func) +static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, + u8 func) { struct srp_target_port *target = ch->target; struct srp_rport *rport = target->rport; @@ -2444,7 +2503,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, memset(tsk_mgmt, 0, sizeof *tsk_mgmt); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64((u64) lun << 48); + int_to_scsilun(lun, &tsk_mgmt->lun); tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT; tsk_mgmt->tsk_mgmt_func = func; tsk_mgmt->task_tag = req_tag; @@ -2558,8 +2617,7 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->id_ext)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, @@ -2567,8 +2625,7 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } static ssize_t show_service_id(struct device *dev, @@ -2576,8 +2633,7 @@ static ssize_t show_service_id(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->service_id)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id)); } static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, @@ -2746,7 +2802,6 @@ static struct scsi_host_template srp_template = { .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = srp_host_attrs, - .use_blk_tags = 1, .track_queue_depth = 1, }; @@ -2775,7 +2830,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) target->state = SRP_TARGET_SCANNING; sprintf(target->target_name, "SRP.T10:%016llX", - (unsigned long long) be64_to_cpu(target->id_ext)); + be64_to_cpu(target->id_ext)); if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device)) return -ENODEV; @@ -3149,7 +3204,7 @@ static ssize_t srp_create_target(struct device *dev, target_host->transportt = ib_srp_transport_template; target_host->max_channel = 0; target_host->max_id = 1; - target_host->max_lun = SRP_MAX_LUN; + target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target = host_to_target(target_host); @@ -3157,8 +3212,8 @@ static ssize_t srp_create_target(struct device *dev, target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; target->srp_host = host; - target->lkey = host->srp_dev->mr->lkey; - target->rkey = host->srp_dev->mr->rkey; + target->lkey = host->srp_dev->pd->local_dma_lkey; + target->global_mr = host->srp_dev->global_mr; target->cmd_sg_cnt = cmd_sg_entries; target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; target->allow_ext_sg = allow_ext_sg; @@ -3177,10 +3232,6 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto out; - ret = scsi_init_shared_tag_map(target_host, target_host->can_queue); - if (ret) - goto out; - target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; if (!srp_conn_unique(target->srp_host, target)) { @@ -3209,7 +3260,7 @@ static ssize_t srp_create_target(struct device *dev, INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); spin_lock_init(&target->lock); - ret = ib_query_gid(ibdev, host->port, 0, &target->sgid); + ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); if (ret) goto out; @@ -3390,7 +3441,7 @@ static void srp_add_one(struct ib_device *device) struct srp_device *srp_dev; struct ib_device_attr *dev_attr; struct srp_host *host; - int mr_page_shift, s, e, p; + int mr_page_shift, p; u64 max_pages_per_mr; dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); @@ -3415,6 +3466,7 @@ static void srp_add_one(struct ib_device *device) srp_dev->use_fast_reg = (srp_dev->has_fr && (!srp_dev->has_fmr || prefer_fr)); + srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; /* * Use the smallest page size supported by the HCA, down to a @@ -3447,22 +3499,18 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->pd)) goto free_dev; - srp_dev->mr = ib_get_dma_mr(srp_dev->pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(srp_dev->mr)) - goto err_pd; - - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = 0; - e = 0; + if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) { + srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + if (IS_ERR(srp_dev->global_mr)) + goto err_pd; } else { - s = 1; - e = device->phys_port_cnt; + srp_dev->global_mr = NULL; } - for (p = s; p <= e; ++p) { + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { host = srp_add_port(srp_dev, p); if (host) list_add_tail(&host->list, &srp_dev->dev_list); @@ -3482,13 +3530,13 @@ free_attr: kfree(dev_attr); } -static void srp_remove_one(struct ib_device *device) +static void srp_remove_one(struct ib_device *device, void *client_data) { struct srp_device *srp_dev; struct srp_host *host, *tmp_host; struct srp_target_port *target; - srp_dev = ib_get_client_data(device, &srp_client); + srp_dev = client_data; if (!srp_dev) return; @@ -3517,7 +3565,8 @@ static void srp_remove_one(struct ib_device *device) kfree(host); } - ib_dereg_mr(srp_dev->mr); + if (srp_dev->global_mr) + ib_dereg_mr(srp_dev->global_mr); ib_dealloc_pd(srp_dev->pd); kfree(srp_dev); diff --git a/kernel/drivers/infiniband/ulp/srp/ib_srp.h b/kernel/drivers/infiniband/ulp/srp/ib_srp.h index e690847a4..f6af531f9 100644 --- a/kernel/drivers/infiniband/ulp/srp/ib_srp.h +++ b/kernel/drivers/infiniband/ulp/srp/ib_srp.h @@ -54,7 +54,6 @@ enum { SRP_DLID_REDIRECT = 2, SRP_STALE_CONN = 3, - SRP_MAX_LUN = 512, SRP_DEF_SG_TABLESIZE = 12, SRP_DEFAULT_QUEUE_SIZE = 1 << 6, @@ -96,13 +95,14 @@ struct srp_device { struct list_head dev_list; struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *mr; + struct ib_mr *global_mr; u64 mr_page_mask; int mr_page_size; int mr_max_size; int max_pages_per_mr; bool has_fmr; bool has_fr; + bool use_fmr; bool use_fast_reg; }; @@ -183,10 +183,10 @@ struct srp_target_port { spinlock_t lock; /* read only in the hot path */ + struct ib_mr *global_mr; struct srp_rdma_ch *ch; u32 ch_count; u32 lkey; - u32 rkey; enum srp_target_state state; unsigned int max_iu_len; unsigned int cmd_sg_cnt; @@ -242,7 +242,6 @@ struct srp_iu { struct srp_fr_desc { struct list_head entry; struct ib_mr *mr; - struct ib_fast_reg_page_list *frpl; }; /** @@ -277,26 +276,33 @@ struct srp_fr_pool { * @npages: Number of page addresses in the pages[] array. * @nmdesc: Number of FMR or FR memory descriptors used for mapping. * @ndesc: Number of SRP buffer descriptors that have been filled in. - * @unmapped_sg: First element of the sg-list that is mapped via FMR or FR. - * @unmapped_index: Index of the first element mapped via FMR or FR. - * @unmapped_addr: DMA address of the first element mapped via FMR or FR. */ struct srp_map_state { union { - struct ib_pool_fmr **next_fmr; - struct srp_fr_desc **next_fr; + struct { + struct ib_pool_fmr **next; + struct ib_pool_fmr **end; + } fmr; + struct { + struct srp_fr_desc **next; + struct srp_fr_desc **end; + } fr; + struct { + void **next; + void **end; + } gen; }; struct srp_direct_buf *desc; - u64 *pages; + union { + u64 *pages; + struct scatterlist *sg; + }; dma_addr_t base_dma_addr; u32 dma_len; u32 total_len; unsigned int npages; unsigned int nmdesc; unsigned int ndesc; - struct scatterlist *unmapped_sg; - int unmapped_index; - dma_addr_t unmapped_addr; }; #endif /* IB_SRP_H */ diff --git a/kernel/drivers/infiniband/ulp/srpt/ib_srpt.c b/kernel/drivers/infiniband/ulp/srpt/ib_srpt.c index 9b84b4c0a..2e2fe818c 100644 --- a/kernel/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/kernel/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -41,12 +41,10 @@ #include <linux/string.h> #include <linux/delay.h> #include <linux/atomic.h> +#include <scsi/scsi_proto.h> #include <scsi/scsi_tcq.h> -#include <target/configfs_macros.h> #include <target/target_core_base.h> -#include <target/target_core_fabric_configfs.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "ib_srpt.h" /* Name of this kernel module. */ @@ -93,7 +91,6 @@ MODULE_PARM_DESC(srpt_service_guid, " instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; -static const struct target_core_fabric_ops srpt_template; static void srpt_release_channel(struct srpt_rdma_ch *ch); static int srpt_queue_status(struct se_cmd *cmd); @@ -303,7 +300,7 @@ static void srpt_get_iou(struct ib_dm_mad *mad) int i; ioui = (struct ib_dm_iou_info *)mad->data; - ioui->change_id = __constant_cpu_to_be16(1); + ioui->change_id = cpu_to_be16(1); ioui->max_controllers = 16; /* set present for slot 1 and empty for the rest */ @@ -331,13 +328,13 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, if (!slot || slot > 16) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -349,10 +346,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); iocp->subsys_device_id = 0x0; - iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS); - iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS); - iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL); - iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION); + iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS); + iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); + iocp->protocol = cpu_to_be16(SRP_PROTOCOL); + iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); iocp->rdma_read_depth = 4; iocp->send_size = cpu_to_be32(srp_max_req_size); @@ -380,13 +377,13 @@ static void srpt_get_svc_entries(u64 ioc_guid, if (!slot || slot > 16) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2 || lo > hi || hi > 1) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -437,7 +434,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, break; default: rsp_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; } } @@ -476,7 +473,8 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp, mad_wc->wc->pkey_index, 0, IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA, - GFP_KERNEL); + GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(rsp)) goto err_rsp; @@ -493,11 +491,11 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, break; case IB_MGMT_METHOD_SET: dm_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; default: dm_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); break; } @@ -546,7 +544,8 @@ static int srpt_refresh_port(struct srpt_port *sport) sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; - ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); + ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid, + NULL); if (ret) goto err_query_port; @@ -783,7 +782,7 @@ static int srpt_post_recv(struct srpt_device *sdev, list.addr = ioctx->ioctx.dma; list.length = srp_max_req_size; - list.lkey = sdev->mr->lkey; + list.lkey = sdev->pd->local_dma_lkey; wr.next = NULL; wr.sg_list = &list; @@ -818,7 +817,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch, list.addr = ioctx->ioctx.dma; list.length = len; - list.lkey = sdev->mr->lkey; + list.lkey = sdev->pd->local_dma_lkey; wr.next = NULL; wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index); @@ -1206,7 +1205,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, while (rsize > 0 && tsize > 0) { sge->addr = dma_addr; - sge->lkey = ch->sport->sdev->mr->lkey; + sge->lkey = ch->sport->sdev->pd->local_dma_lkey; if (rsize >= dma_len) { sge->length = @@ -1334,12 +1333,12 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) BUG_ON(ch->sess == NULL); - target_put_sess_cmd(ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); goto out; } pr_debug("Aborting cmd with state %d and tag %lld\n", state, - ioctx->tag); + ioctx->cmd.tag); switch (state) { case SRPT_STATE_NEW: @@ -1365,11 +1364,11 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; case SRPT_STATE_MGMT_RSP_SENT: srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; default: WARN(1, "Unexpected command state (%d)", state); @@ -1387,7 +1386,6 @@ static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) { struct srpt_send_ioctx *ioctx; enum srpt_command_state state; - struct se_cmd *cmd; u32 index; atomic_inc(&ch->sq_wr_avail); @@ -1395,7 +1393,6 @@ static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) index = idx_from_wr_id(wr_id); ioctx = ch->ioctx_ring[index]; state = srpt_get_cmd_state(ioctx); - cmd = &ioctx->cmd; WARN_ON(state != SRPT_STATE_CMD_RSP_SENT && state != SRPT_STATE_MGMT_RSP_SENT @@ -1472,10 +1469,8 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, enum srpt_opcode opcode) { - struct se_cmd *cmd; enum srpt_command_state state; - cmd = &ioctx->cmd; state = srpt_get_cmd_state(ioctx); switch (opcode) { case SRPT_RDMA_READ_LAST: @@ -1539,7 +1534,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = - __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; srp_rsp->status = status; @@ -1589,8 +1584,8 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; - srp_rsp->req_lim_delta = __constant_cpu_to_be32(1 - + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->req_lim_delta = + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; @@ -1634,7 +1629,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) switch (len) { case 8: if ((*((__be64 *)lun) & - __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) + cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) goto out_err; break; case 4: @@ -1679,7 +1674,7 @@ static int srpt_check_stop_free(struct se_cmd *cmd) struct srpt_send_ioctx *ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); - return target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + return target_put_sess_cmd(&ioctx->cmd); } /** @@ -1701,7 +1696,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, srp_cmd = recv_ioctx->ioctx.buf; cmd = &send_ioctx->cmd; - send_ioctx->tag = srp_cmd->tag; + cmd->tag = srp_cmd->tag; switch (srp_cmd->task_attr) { case SRP_CMD_SIMPLE_Q: @@ -1772,7 +1767,7 @@ static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) for (i = 0; i < ch->rq_size; ++i) { target = ch->ioctx_ring[i]; if (target->cmd.se_lun == ioctx->cmd.se_lun && - target->tag == tag && + target->cmd.tag == tag && srpt_get_cmd_state(target) != SRPT_STATE_DONE) { ret = 0; /* now let the target core abort &target->cmd; */ @@ -1831,7 +1826,7 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess); srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); - send_ioctx->tag = srp_tsk->tag; + send_ioctx->cmd.tag = srp_tsk->tag; tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); if (tcm_tmr < 0) { send_ioctx->cmd.se_tmr_req->response = @@ -2080,6 +2075,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct srpt_port *sport = ch->sport; struct srpt_device *sdev = sport->sdev; u32 srp_sq_size = sport->port_attrib.srp_sq_size; + struct ib_cq_init_attr cq_attr = {}; int ret; WARN_ON(ch->rq_size < 1); @@ -2090,8 +2086,9 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: + cq_attr.cqe = ch->rq_size + srp_sq_size; ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, - ch->rq_size + srp_sq_size, 0); + &cq_attr); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", @@ -2176,12 +2173,9 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) */ static void __srpt_close_ch(struct srpt_rdma_ch *ch) { - struct srpt_device *sdev; enum rdma_ch_state prev_state; unsigned long flags; - sdev = ch->sport->sdev; - spin_lock_irqsave(&ch->spinlock, flags); prev_state = ch->state; switch (prev_state) { @@ -2454,8 +2448,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } if (it_iu_len > srp_max_req_size || it_iu_len < 64) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); ret = -EINVAL; pr_err("rejected SRP_LOGIN_REQ because its" " length (%d bytes) is out of range (%d .. %d)\n", @@ -2464,8 +2458,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } if (!sport->enabled) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); ret = -EINVAL; pr_err("rejected SRP_LOGIN_REQ because the target port" " has not yet been enabled\n"); @@ -2510,8 +2504,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) || *(__be64 *)(req->target_port_id + 8) != cpu_to_be64(srpt_service_guid)) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); ret = -ENOMEM; pr_err("rejected SRP_LOGIN_REQ because it" " has an invalid target port identifier.\n"); @@ -2520,8 +2514,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch = kzalloc(sizeof *ch, GFP_KERNEL); if (!ch) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because no memory.\n"); ret = -ENOMEM; goto reject; @@ -2557,8 +2551,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ret = srpt_create_ch_ib(ch); if (ret) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because creating" " a new RDMA channel failed.\n"); goto free_ring; @@ -2566,8 +2560,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ret = srpt_ch_qp_rtr(ch, ch->qp); if (ret) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling" " RTR failed (error code = %d)\n", ret); goto destroy_ib; @@ -2585,15 +2578,15 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (!nacl) { pr_info("Rejected login because no ACL has been" " configured yet for initiator %s.\n", ch->sess_name); - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); goto destroy_ib; } ch->sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(ch->sess)) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_debug("Failed to create session\n"); goto deregister_session; } @@ -2609,8 +2602,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, rsp->max_it_iu_len = req->req_it_iu_len; rsp->max_ti_iu_len = req->req_it_iu_len; ch->max_ti_iu_len = it_iu_len; - rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT - | SRP_BUF_FORMAT_INDIRECT); + rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); rsp->req_lim_delta = cpu_to_be32(ch->rq_size); atomic_set(&ch->req_lim, ch->rq_size); atomic_set(&ch->req_lim_delta, 0); @@ -2660,8 +2653,8 @@ free_ch: reject: rej->opcode = SRP_LOGIN_REJ; rej->tag = req->tag; - rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT - | SRP_BUF_FORMAT_INDIRECT); + rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, (void *)rej, sizeof *rej); @@ -2828,7 +2821,7 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx) { - struct ib_send_wr wr; + struct ib_rdma_wr wr; struct ib_send_wr *bad_wr; struct rdma_iu *riu; int i; @@ -2856,29 +2849,29 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, for (i = 0; i < n_rdma; ++i, ++riu) { if (dir == DMA_FROM_DEVICE) { - wr.opcode = IB_WR_RDMA_WRITE; - wr.wr_id = encode_wr_id(i == n_rdma - 1 ? + wr.wr.opcode = IB_WR_RDMA_WRITE; + wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ? SRPT_RDMA_WRITE_LAST : SRPT_RDMA_MID, ioctx->ioctx.index); } else { - wr.opcode = IB_WR_RDMA_READ; - wr.wr_id = encode_wr_id(i == n_rdma - 1 ? + wr.wr.opcode = IB_WR_RDMA_READ; + wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ? SRPT_RDMA_READ_LAST : SRPT_RDMA_MID, ioctx->ioctx.index); } - wr.next = NULL; - wr.wr.rdma.remote_addr = riu->raddr; - wr.wr.rdma.rkey = riu->rkey; - wr.num_sge = riu->sge_cnt; - wr.sg_list = riu->sge; + wr.wr.next = NULL; + wr.remote_addr = riu->raddr; + wr.rkey = riu->rkey; + wr.wr.num_sge = riu->sge_cnt; + wr.wr.sg_list = riu->sge; /* only get completion event for the last rdma write */ if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) - wr.send_flags = IB_SEND_SIGNALED; + wr.wr.send_flags = IB_SEND_SIGNALED; - ret = ib_post_send(ch->qp, &wr, &bad_wr); + ret = ib_post_send(ch->qp, &wr.wr, &bad_wr); if (ret) break; } @@ -2887,11 +2880,11 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", __func__, __LINE__, ret, i, n_rdma); if (ret && i > 0) { - wr.num_sge = 0; - wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); - wr.send_flags = IB_SEND_SIGNALED; + wr.wr.num_sge = 0; + wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); + wr.wr.send_flags = IB_SEND_SIGNALED; while (ch->state == CH_LIVE && - ib_post_send(ch->qp, &wr, &bad_wr) != 0) { + ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) { pr_info("Trying to abort failed RDMA transfer [%d]\n", ioctx->ioctx.index); msleep(1000); @@ -2979,7 +2972,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) case CH_DRAINING: case CH_RELEASING: pr_debug("cmd with tag %lld: channel disconnecting\n", - ioctx->tag); + ioctx->cmd.tag); srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); ret = -EINVAL; goto out; @@ -3054,27 +3047,27 @@ static void srpt_queue_response(struct se_cmd *cmd) ret = srpt_xfer_data(ch, ioctx); if (ret) { pr_err("xfer_data failed for tag %llu\n", - ioctx->tag); + ioctx->cmd.tag); return; } } if (state != SRPT_STATE_MGMT) - resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->tag, + resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->cmd.tag, cmd->scsi_status); else { srp_tm_status = tcm_to_srp_tsk_mgmt_status(cmd->se_tmr_req->response); resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status, - ioctx->tag); + ioctx->cmd.tag); } ret = srpt_post_send(ch, ioctx, resp_len); if (ret) { pr_err("sending cmd response failed for tag %llu\n", - ioctx->tag); + ioctx->cmd.tag); srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); } } @@ -3217,10 +3210,6 @@ static void srpt_add_one(struct ib_device *device) if (IS_ERR(sdev->pd)) goto free_dev; - sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(sdev->mr)) - goto err_pd; - sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr); srq_attr.event_handler = srpt_srq_event; @@ -3232,7 +3221,7 @@ static void srpt_add_one(struct ib_device *device) sdev->srq = ib_create_srq(sdev->pd, &srq_attr); if (IS_ERR(sdev->srq)) - goto err_mr; + goto err_pd; pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr, @@ -3256,7 +3245,7 @@ static void srpt_add_one(struct ib_device *device) * in the system as service_id; therefore, the target_id will change * if this HCA is gone bad and replaced by different HCA */ - if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL)) + if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0)) goto err_cm; INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, @@ -3317,8 +3306,6 @@ err_cm: ib_destroy_cm_id(sdev->cm_id); err_srq: ib_destroy_srq(sdev->srq); -err_mr: - ib_dereg_mr(sdev->mr); err_pd: ib_dealloc_pd(sdev->pd); free_dev: @@ -3332,12 +3319,11 @@ err: /** * srpt_remove_one() - InfiniBand device removal callback function. */ -static void srpt_remove_one(struct ib_device *device) +static void srpt_remove_one(struct ib_device *device, void *client_data) { - struct srpt_device *sdev; + struct srpt_device *sdev = client_data; int i; - sdev = ib_get_client_data(device, &srpt_client); if (!sdev) { pr_info("%s(%s): nothing to do.\n", __func__, device->name); return; @@ -3364,7 +3350,6 @@ static void srpt_remove_one(struct ib_device *device) srpt_release_sdev(sdev); ib_destroy_srq(sdev->srq); - ib_dereg_mr(sdev->mr); ib_dealloc_pd(sdev->pd); srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, @@ -3394,11 +3379,6 @@ static char *srpt_get_fabric_name(void) return "srpt"; } -static u8 srpt_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - return SCSI_TRANSPORTID_PROTOCOLID_SRP; -} - static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) { struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); @@ -3411,69 +3391,6 @@ static u16 srpt_get_tag(struct se_portal_group *tpg) return 1; } -static u32 srpt_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static u32 srpt_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, unsigned char *buf) -{ - struct srpt_node_acl *nacl; - struct spc_rdma_transport_id *tr_id; - - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - tr_id = (void *)buf; - tr_id->protocol_identifier = SCSI_TRANSPORTID_PROTOCOLID_SRP; - memcpy(tr_id->i_port_id, nacl->i_port_id, sizeof(tr_id->i_port_id)); - return sizeof(*tr_id); -} - -static u32 srpt_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - return sizeof(struct spc_rdma_transport_id); -} - -static char *srpt_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct spc_rdma_transport_id *tr_id; - - *port_nexus_ptr = NULL; - *out_tid_len = sizeof(struct spc_rdma_transport_id); - tr_id = (void *)buf; - return (char *)tr_id->i_port_id; -} - -static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - struct srpt_node_acl *nacl; - - nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct srpt_node_acl\n"); - return NULL; - } - - return &nacl->nacl; -} - -static void srpt_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct srpt_node_acl *nacl; - - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - kfree(nacl); -} - static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -3547,14 +3464,6 @@ static void srpt_set_default_node_attrs(struct se_node_acl *nacl) { } -static u32 srpt_get_task_tag(struct se_cmd *se_cmd) -{ - struct srpt_send_ioctx *ioctx; - - ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); - return ioctx->tag; -} - /* Note: only used from inside debug printk's by the TCM core. */ static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd) { @@ -3597,40 +3506,19 @@ out: * configfs callback function invoked for * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id */ -static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg, - struct config_group *group, - const char *name) +static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { - struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); - struct se_node_acl *se_nacl, *se_nacl_new; - struct srpt_node_acl *nacl; - int ret = 0; - u32 nexus_depth = 1; + struct srpt_port *sport = + container_of(se_nacl->se_tpg, struct srpt_port, port_tpg_1); + struct srpt_node_acl *nacl = + container_of(se_nacl, struct srpt_node_acl, nacl); u8 i_port_id[16]; if (srpt_parse_i_port_id(i_port_id, name) < 0) { pr_err("invalid initiator port ID %s\n", name); - ret = -EINVAL; - goto err; + return -EINVAL; } - se_nacl_new = srpt_alloc_fabric_acl(tpg); - if (!se_nacl_new) { - ret = -ENOMEM; - goto err; - } - /* - * nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a node ACL from demo mode to explict - */ - se_nacl = core_tpg_add_initiator_node_acl(tpg, se_nacl_new, name, - nexus_depth); - if (IS_ERR(se_nacl)) { - ret = PTR_ERR(se_nacl); - goto err; - } - /* Locate our struct srpt_node_acl and set sdev and i_port_id. */ - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); memcpy(&nacl->i_port_id[0], &i_port_id[0], 16); nacl->sport = sport; @@ -3638,45 +3526,37 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg, list_add_tail(&nacl->list, &sport->port_acl_list); spin_unlock_irq(&sport->port_acl_lock); - return se_nacl; -err: - return ERR_PTR(ret); + return 0; } /* * configfs callback function invoked for * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id */ -static void srpt_drop_nodeacl(struct se_node_acl *se_nacl) +static void srpt_cleanup_nodeacl(struct se_node_acl *se_nacl) { - struct srpt_node_acl *nacl; - struct srpt_device *sdev; - struct srpt_port *sport; + struct srpt_node_acl *nacl = + container_of(se_nacl, struct srpt_node_acl, nacl); + struct srpt_port *sport = nacl->sport; - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - sport = nacl->sport; - sdev = sport->sdev; spin_lock_irq(&sport->port_acl_lock); list_del(&nacl->list); spin_unlock_irq(&sport->port_acl_lock); - core_tpg_del_initiator_node_acl(&sport->port_tpg_1, se_nacl, 1); - srpt_release_fabric_acl(NULL, se_nacl); } -static ssize_t srpt_tpg_attrib_show_srp_max_rdma_size( - struct se_portal_group *se_tpg, - char *page) +static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item, + char *page) { + struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size); } -static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size( - struct se_portal_group *se_tpg, - const char *page, - size_t count) +static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item, + const char *page, size_t count) { + struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); unsigned long val; int ret; @@ -3701,22 +3581,19 @@ static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size( return count; } -TF_TPG_ATTRIB_ATTR(srpt, srp_max_rdma_size, S_IRUGO | S_IWUSR); - -static ssize_t srpt_tpg_attrib_show_srp_max_rsp_size( - struct se_portal_group *se_tpg, - char *page) +static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item, + char *page) { + struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size); } -static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size( - struct se_portal_group *se_tpg, - const char *page, - size_t count) +static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item, + const char *page, size_t count) { + struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); unsigned long val; int ret; @@ -3741,22 +3618,19 @@ static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size( return count; } -TF_TPG_ATTRIB_ATTR(srpt, srp_max_rsp_size, S_IRUGO | S_IWUSR); - -static ssize_t srpt_tpg_attrib_show_srp_sq_size( - struct se_portal_group *se_tpg, - char *page) +static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item, + char *page) { + struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size); } -static ssize_t srpt_tpg_attrib_store_srp_sq_size( - struct se_portal_group *se_tpg, - const char *page, - size_t count) +static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, + const char *page, size_t count) { + struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); unsigned long val; int ret; @@ -3781,29 +3655,29 @@ static ssize_t srpt_tpg_attrib_store_srp_sq_size( return count; } -TF_TPG_ATTRIB_ATTR(srpt, srp_sq_size, S_IRUGO | S_IWUSR); +CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size); +CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rsp_size); +CONFIGFS_ATTR(srpt_tpg_attrib_, srp_sq_size); static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { - &srpt_tpg_attrib_srp_max_rdma_size.attr, - &srpt_tpg_attrib_srp_max_rsp_size.attr, - &srpt_tpg_attrib_srp_sq_size.attr, + &srpt_tpg_attrib_attr_srp_max_rdma_size, + &srpt_tpg_attrib_attr_srp_max_rsp_size, + &srpt_tpg_attrib_attr_srp_sq_size, NULL, }; -static ssize_t srpt_tpg_show_enable( - struct se_portal_group *se_tpg, - char *page) +static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page) { + struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0); } -static ssize_t srpt_tpg_store_enable( - struct se_portal_group *se_tpg, - const char *page, - size_t count) +static ssize_t srpt_tpg_enable_store(struct config_item *item, + const char *page, size_t count) { + struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); unsigned long tmp; int ret; @@ -3826,10 +3700,10 @@ static ssize_t srpt_tpg_store_enable( return count; } -TF_TPG_BASE_ATTR(srpt, enable, S_IRUGO | S_IWUSR); +CONFIGFS_ATTR(srpt_tpg_, enable); static struct configfs_attribute *srpt_tpg_attrs[] = { - &srpt_tpg_enable.attr, + &srpt_tpg_attr_enable, NULL, }; @@ -3845,8 +3719,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, int res; /* Initialize sport->port_wwn and sport->port_tpg_1 */ - res = core_tpg_register(&srpt_template, &sport->port_wwn, - &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL); + res = core_tpg_register(&sport->port_wwn, &sport->port_tpg_1, SCSI_PROTOCOL_SRP); if (res) return ERR_PTR(res); @@ -3900,36 +3773,29 @@ static void srpt_drop_tport(struct se_wwn *wwn) pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item)); } -static ssize_t srpt_wwn_show_attr_version(struct target_fabric_configfs *tf, - char *buf) +static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) { return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION); } -TF_WWN_ATTR_RO(srpt, version); +CONFIGFS_ATTR_RO(srpt_wwn_, version); static struct configfs_attribute *srpt_wwn_attrs[] = { - &srpt_wwn_version.attr, + &srpt_wwn_attr_version, NULL, }; static const struct target_core_fabric_ops srpt_template = { .module = THIS_MODULE, .name = "srpt", + .node_acl_size = sizeof(struct srpt_node_acl), .get_fabric_name = srpt_get_fabric_name, - .get_fabric_proto_ident = srpt_get_fabric_proto_ident, .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, - .tpg_get_default_depth = srpt_get_default_depth, - .tpg_get_pr_transport_id = srpt_get_pr_transport_id, - .tpg_get_pr_transport_id_len = srpt_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = srpt_parse_pr_out_transport_id, .tpg_check_demo_mode = srpt_check_false, .tpg_check_demo_mode_cache = srpt_check_true, .tpg_check_demo_mode_write_protect = srpt_check_true, .tpg_check_prod_mode_write_protect = srpt_check_false, - .tpg_alloc_fabric_acl = srpt_alloc_fabric_acl, - .tpg_release_fabric_acl = srpt_release_fabric_acl, .tpg_get_inst_index = srpt_tpg_get_inst_index, .release_cmd = srpt_release_cmd, .check_stop_free = srpt_check_stop_free, @@ -3940,7 +3806,6 @@ static const struct target_core_fabric_ops srpt_template = { .write_pending = srpt_write_pending, .write_pending_status = srpt_write_pending_status, .set_default_node_attributes = srpt_set_default_node_attrs, - .get_task_tag = srpt_get_task_tag, .get_cmd_state = srpt_get_tcm_cmd_state, .queue_data_in = srpt_queue_data_in, .queue_status = srpt_queue_status, @@ -3954,12 +3819,8 @@ static const struct target_core_fabric_ops srpt_template = { .fabric_drop_wwn = srpt_drop_tport, .fabric_make_tpg = srpt_make_tpg, .fabric_drop_tpg = srpt_drop_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = srpt_make_nodeacl, - .fabric_drop_nodeacl = srpt_drop_nodeacl, + .fabric_init_nodeacl = srpt_init_nodeacl, + .fabric_cleanup_nodeacl = srpt_cleanup_nodeacl, .tfc_wwn_attrs = srpt_wwn_attrs, .tfc_tpg_base_attrs = srpt_tpg_attrs, diff --git a/kernel/drivers/infiniband/ulp/srpt/ib_srpt.h b/kernel/drivers/infiniband/ulp/srpt/ib_srpt.h index 3dae15690..5faad8acd 100644 --- a/kernel/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/kernel/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -238,14 +238,13 @@ struct srpt_send_ioctx { bool rdma_aborted; struct se_cmd cmd; struct completion tx_done; - u64 tag; int sg_cnt; int mapped_sg_count; u16 n_rdma_ius; u8 n_rdma; u8 n_rbuf; bool queue_status_only; - u8 sense_data[SCSI_SENSE_BUFFERSIZE]; + u8 sense_data[TRANSPORT_SENSE_BUFFER]; }; /** @@ -394,7 +393,6 @@ struct srpt_port { struct srpt_device { struct ib_device *device; struct ib_pd *pd; - struct ib_mr *mr; struct ib_srq *srq; struct ib_cm_id *cm_id; struct ib_device_attr dev_attr; @@ -410,34 +408,16 @@ struct srpt_device { /** * struct srpt_node_acl - Per-initiator ACL data (managed via configfs). + * @nacl: Target core node ACL information. * @i_port_id: 128-bit SRP initiator port ID. * @sport: port information. - * @nacl: Target core node ACL information. * @list: Element of the per-HCA ACL list. */ struct srpt_node_acl { + struct se_node_acl nacl; u8 i_port_id[16]; struct srpt_port *sport; - struct se_node_acl nacl; struct list_head list; }; -/* - * SRP-releated SCSI persistent reservation definitions. - * - * See also SPC4r28, section 7.6.1 (Protocol specific parameters introduction). - * See also SPC4r28, section 7.6.4.5 (TransportID for initiator ports using - * SCSI over an RDMA interface). - */ - -enum { - SCSI_TRANSPORTID_PROTOCOLID_SRP = 4, -}; - -struct spc_rdma_transport_id { - uint8_t protocol_identifier; - uint8_t reserved[7]; - uint8_t i_port_id[16]; -}; - #endif /* IB_SRPT_H */ |