summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/infiniband/hw/mlx4/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/drivers/infiniband/hw/mlx4/main.c')
-rw-r--r--kernel/drivers/infiniband/hw/mlx4/main.c1166
1 files changed, 597 insertions, 569 deletions
diff --git a/kernel/drivers/infiniband/hw/mlx4/main.c b/kernel/drivers/infiniband/hw/mlx4/main.c
index cc64400d4..97d6878f9 100644
--- a/kernel/drivers/infiniband/hw/mlx4/main.c
+++ b/kernel/drivers/infiniband/hw/mlx4/main.c
@@ -45,6 +45,9 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include <net/bonding.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
@@ -74,13 +77,6 @@ static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
-struct update_gid_work {
- struct work_struct work;
- union ib_gid gids[128];
- struct mlx4_ib_dev *dev;
- int port;
-};
-
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static struct workqueue_struct *wq;
@@ -93,8 +89,6 @@ static void init_query_mad(struct ib_smp *mad)
mad->method = IB_MGMT_METHOD_GET;
}
-static union ib_gid zgid;
-
static int check_flow_steering_support(struct mlx4_dev *dev)
{
int eth_num_ports = 0;
@@ -131,15 +125,267 @@ static int num_ib_ports(struct mlx4_dev *dev)
return ib_ports;
}
+static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
+
+ if (dev) {
+ if (mlx4_is_bonded(ibdev->dev)) {
+ struct net_device *upper = NULL;
+
+ upper = netdev_master_upper_dev_get_rcu(dev);
+ if (upper) {
+ struct net_device *active;
+
+ active = bond_option_active_slave_get_rcu(netdev_priv(upper));
+ if (active)
+ dev = active;
+ }
+ }
+ }
+ if (dev)
+ dev_hold(dev);
+
+ rcu_read_unlock();
+ return dev;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ union ib_gid *gid_tbl;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ gid_tbl = mailbox->buf;
+
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
+ memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | port_num,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (mlx4_is_bonded(dev))
+ err += mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | 2,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_add_gid(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct mlx4_port_gid_table *port_gid_table;
+ int free = -1, found = -1;
+ int ret = 0;
+ int hw_update = 0;
+ int i;
+ struct gid_entry *gids = NULL;
+
+ if (!rdma_cap_roce_gid_table(device, port_num))
+ return -EINVAL;
+
+ if (port_num > MLX4_MAX_PORTS)
+ return -EINVAL;
+
+ if (!context)
+ return -EINVAL;
+
+ port_gid_table = &iboe->gids[port_num - 1];
+ spin_lock_bh(&iboe->lock);
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+ if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+ found = i;
+ break;
+ }
+ if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
+ free = i; /* HW has space */
+ }
+
+ if (found < 0) {
+ if (free < 0) {
+ ret = -ENOSPC;
+ } else {
+ port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
+ if (!port_gid_table->gids[free].ctx) {
+ ret = -ENOMEM;
+ } else {
+ *context = port_gid_table->gids[free].ctx;
+ memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ port_gid_table->gids[free].ctx->real_index = free;
+ port_gid_table->gids[free].ctx->refcount = 1;
+ hw_update = 1;
+ }
+ }
+ } else {
+ struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
+ *context = ctx;
+ ctx->refcount++;
+ }
+ if (!ret && hw_update) {
+ gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
+ if (!gids) {
+ ret = -ENOMEM;
+ } else {
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ }
+ }
+ spin_unlock_bh(&iboe->lock);
+
+ if (!ret && hw_update) {
+ ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ kfree(gids);
+ }
+
+ return ret;
+}
+
+static int mlx4_ib_del_gid(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ void **context)
+{
+ struct gid_cache_context *ctx = *context;
+ struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct mlx4_port_gid_table *port_gid_table;
+ int ret = 0;
+ int hw_update = 0;
+ struct gid_entry *gids = NULL;
+
+ if (!rdma_cap_roce_gid_table(device, port_num))
+ return -EINVAL;
+
+ if (port_num > MLX4_MAX_PORTS)
+ return -EINVAL;
+
+ port_gid_table = &iboe->gids[port_num - 1];
+ spin_lock_bh(&iboe->lock);
+ if (ctx) {
+ ctx->refcount--;
+ if (!ctx->refcount) {
+ unsigned int real_index = ctx->real_index;
+
+ memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
+ kfree(port_gid_table->gids[real_index].ctx);
+ port_gid_table->gids[real_index].ctx = NULL;
+ hw_update = 1;
+ }
+ }
+ if (!ret && hw_update) {
+ int i;
+
+ gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
+ if (!gids) {
+ ret = -ENOMEM;
+ } else {
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ }
+ }
+ spin_unlock_bh(&iboe->lock);
+
+ if (!ret && hw_update) {
+ ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ kfree(gids);
+ }
+ return ret;
+}
+
+int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
+ u8 port_num, int index)
+{
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct gid_cache_context *ctx = NULL;
+ union ib_gid gid;
+ struct mlx4_port_gid_table *port_gid_table;
+ int real_index = -EINVAL;
+ int i;
+ int ret;
+ unsigned long flags;
+
+ if (port_num > MLX4_MAX_PORTS)
+ return -EINVAL;
+
+ if (mlx4_is_bonded(ibdev->dev))
+ port_num = 1;
+
+ if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
+ return index;
+
+ ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+ if (ret)
+ return ret;
+
+ if (!memcmp(&gid, &zgid, sizeof(gid)))
+ return -EINVAL;
+
+ spin_lock_irqsave(&iboe->lock, flags);
+ port_gid_table = &iboe->gids[port_num - 1];
+
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
+ if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+ ctx = port_gid_table->gids[i].ctx;
+ break;
+ }
+ if (ctx)
+ real_index = ctx->real_index;
+ spin_unlock_irqrestore(&iboe->lock, flags);
+ return real_index;
+}
+
static int mlx4_ib_query_device(struct ib_device *ibdev,
- struct ib_device_attr *props)
+ struct ib_device_attr *props,
+ struct ib_udata *uhw)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
int have_ib_ports;
+ struct mlx4_uverbs_ex_query_device cmd;
+ struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
+ struct mlx4_clock_params clock_params;
+
+ if (uhw->inlen) {
+ if (uhw->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
+ if (err)
+ return err;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+ if (cmd.reserved)
+ return -EINVAL;
+ }
+
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
@@ -196,6 +442,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
+ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = dev->dev->persist->pdev->device;
@@ -208,6 +456,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
+ props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
@@ -229,7 +478,25 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
+ props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
+ props->timestamp_mask = 0xFFFFFFFFFFFFULL;
+ if (!mlx4_is_slave(dev->dev))
+ err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
+
+ if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
+ resp.response_length += sizeof(resp.hca_core_clock_offset);
+ if (!err && !mlx4_is_slave(dev->dev)) {
+ resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
+ resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
+ }
+ }
+
+ if (uhw->outlen) {
+ err = ib_copy_to_udata(uhw, &resp, resp.response_length);
+ if (err)
+ goto out;
+ }
out:
kfree(in_mad);
kfree(out_mad);
@@ -375,12 +642,13 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
- if (is_bonded)
- rtnl_lock(); /* required to get upper dev */
spin_lock_bh(&iboe->lock);
ndev = iboe->netdevs[port - 1];
- if (ndev && is_bonded)
- ndev = netdev_master_upper_dev_get(ndev);
+ if (ndev && is_bonded) {
+ rcu_read_lock(); /* required to get upper dev */
+ ndev = netdev_master_upper_dev_get_rcu(ndev);
+ rcu_read_unlock();
+ }
if (!ndev)
goto out_unlock;
@@ -392,8 +660,6 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->phys_state = state_to_phys_state(props->state);
out_unlock:
spin_unlock_bh(&iboe->lock);
- if (is_bonded)
- rtnl_unlock();
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
@@ -476,23 +742,27 @@ out:
return err;
}
-static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *gid)
-{
- struct mlx4_ib_dev *dev = to_mdev(ibdev);
-
- *gid = dev->iboe.gid_table[port - 1][index];
-
- return 0;
-}
-
static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
- if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ int ret;
+
+ if (rdma_protocol_ib(ibdev, port))
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
- else
- return iboe_query_gid(ibdev, port, index, gid);
+
+ if (!rdma_protocol_roce(ibdev, port))
+ return -ENODEV;
+
+ if (!rdma_cap_roce_gid_table(ibdev, port))
+ return -ENODEV;
+
+ ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
+ if (ret == -EAGAIN) {
+ memcpy(gid, &zgid, sizeof(*gid));
+ return 0;
+ }
+
+ return ret;
}
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
@@ -653,7 +923,7 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
resp.cqe_size = dev->dev->caps.cqe_size;
}
- context = kmalloc(sizeof *context, GFP_KERNEL);
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -690,21 +960,143 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
+static void mlx4_ib_vma_open(struct vm_area_struct *area)
+{
+ /* vma_open is called when a new VMA is created on top of our VMA.
+ * This is done through either mremap flow or split_vma (usually due
+ * to mlock, madvise, munmap, etc.). We do not support a clone of the
+ * vma, as this VMA is strongly hardware related. Therefore we set the
+ * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
+ * calling us again and trying to do incorrect actions. We assume that
+ * the original vma size is exactly a single page that there will be no
+ * "splitting" operations on.
+ */
+ area->vm_ops = NULL;
+}
+
+static void mlx4_ib_vma_close(struct vm_area_struct *area)
+{
+ struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
+
+ /* It's guaranteed that all VMAs opened on a FD are closed before the
+ * file itself is closed, therefore no sync is needed with the regular
+ * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
+ * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
+ * The close operation is usually called under mm->mmap_sem except when
+ * process is exiting. The exiting case is handled explicitly as part
+ * of mlx4_ib_disassociate_ucontext.
+ */
+ mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
+ area->vm_private_data;
+
+ /* set the vma context pointer to null in the mlx4_ib driver's private
+ * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
+ */
+ mlx4_ib_vma_priv_data->vma = NULL;
+}
+
+static const struct vm_operations_struct mlx4_ib_vm_ops = {
+ .open = mlx4_ib_vma_open,
+ .close = mlx4_ib_vma_close
+};
+
+static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ int i;
+ int ret = 0;
+ struct vm_area_struct *vma;
+ struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
+ struct task_struct *owning_process = NULL;
+ struct mm_struct *owning_mm = NULL;
+
+ owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+ if (!owning_process)
+ return;
+
+ owning_mm = get_task_mm(owning_process);
+ if (!owning_mm) {
+ pr_info("no mm, disassociate ucontext is pending task termination\n");
+ while (1) {
+ /* make sure that task is dead before returning, it may
+ * prevent a rare case of module down in parallel to a
+ * call to mlx4_ib_vma_close.
+ */
+ put_task_struct(owning_process);
+ msleep(1);
+ owning_process = get_pid_task(ibcontext->tgid,
+ PIDTYPE_PID);
+ if (!owning_process ||
+ owning_process->state == TASK_DEAD) {
+ pr_info("disassociate ucontext done, task was terminated\n");
+ /* in case task was dead need to release the task struct */
+ if (owning_process)
+ put_task_struct(owning_process);
+ return;
+ }
+ }
+ }
+
+ /* need to protect from a race on closing the vma as part of
+ * mlx4_ib_vma_close().
+ */
+ down_read(&owning_mm->mmap_sem);
+ for (i = 0; i < HW_BAR_COUNT; i++) {
+ vma = context->hw_bar_info[i].vma;
+ if (!vma)
+ continue;
+
+ ret = zap_vma_ptes(context->hw_bar_info[i].vma,
+ context->hw_bar_info[i].vma->vm_start,
+ PAGE_SIZE);
+ if (ret) {
+ pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret);
+ BUG_ON(1);
+ }
+
+ /* context going to be destroyed, should not access ops any more */
+ context->hw_bar_info[i].vma->vm_ops = NULL;
+ }
+
+ up_read(&owning_mm->mmap_sem);
+ mmput(owning_mm);
+ put_task_struct(owning_process);
+}
+
+static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
+ struct mlx4_ib_vma_private_data *vma_private_data)
+{
+ vma_private_data->vma = vma;
+ vma->vm_private_data = vma_private_data;
+ vma->vm_ops = &mlx4_ib_vm_ops;
+}
+
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
+ struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
if (vma->vm_pgoff == 0) {
+ /* We prevent double mmaping on same context */
+ if (mucontext->hw_bar_info[HW_BAR_DB].vma)
+ return -EINVAL;
+
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
+
+ mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
+
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
+ /* We prevent double mmaping on same context */
+ if (mucontext->hw_bar_info[HW_BAR_BF].vma)
+ return -EINVAL;
+
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
@@ -712,8 +1104,36 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
- } else
+
+ mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
+
+ } else if (vma->vm_pgoff == 3) {
+ struct mlx4_clock_params params;
+ int ret;
+
+ /* We prevent double mmaping on same context */
+ if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
+ return -EINVAL;
+
+ ret = mlx4_get_internal_clock_params(dev->dev, &params);
+
+ if (ret)
+ return ret;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ (pci_resource_start(dev->dev->persist->pdev,
+ params.bar) +
+ params.offset)
+ >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ mlx4_ib_set_vma_data(vma,
+ &mucontext->hw_bar_info[HW_BAR_CLOCK]);
+ } else {
return -EINVAL;
+ }
return 0;
}
@@ -758,6 +1178,7 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx4_ib_xrcd *xrcd;
+ struct ib_cq_init_attr cq_attr = {};
int err;
if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
@@ -777,7 +1198,8 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
goto err2;
}
- xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
+ cq_attr.cqe = 1;
+ xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
if (IS_ERR(xrcd->cq)) {
err = PTR_ERR(xrcd->cq);
goto err3;
@@ -827,6 +1249,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
return 0;
}
+static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
+ struct mlx4_ib_counters *ctr_table)
+{
+ struct counter_index *counter, *tmp_count;
+
+ mutex_lock(&ctr_table->mutex);
+ list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
+ list) {
+ if (counter->allocated)
+ mlx4_counter_free(ibdev->dev, counter->index);
+ list_del(&counter->list);
+ kfree(counter);
+ }
+ mutex_unlock(&ctr_table->mutex);
+}
+
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
@@ -1090,7 +1528,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
- MLX4_CMD_NATIVE);
+ MLX4_CMD_WRAPPED);
if (ret == -ENOMEM)
pr_err("mcg table is full. Fail to register network rule.\n");
else if (ret == -ENXIO)
@@ -1107,7 +1545,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
int err;
err = mlx4_cmd(dev, reg_id, 0, 0,
MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
- MLX4_CMD_NATIVE);
+ MLX4_CMD_WRAPPED);
if (err)
pr_err("Fail to detach network rule. registration id = 0x%llx\n",
reg_id);
@@ -1185,7 +1623,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
&mflow->reg_id[i].id);
if (err)
goto err_create_flow;
- i++;
if (is_bonded) {
/* Application always sees one port so the mirror rule
* must be on port #2
@@ -1200,6 +1637,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
j++;
}
+ i++;
}
if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
@@ -1207,7 +1645,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
&mflow->reg_id[i].id);
if (err)
goto err_create_flow;
- i++;
+
if (is_bonded) {
flow_attr->port = 2;
err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
@@ -1218,6 +1656,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
j++;
}
/* function to create mirror rule */
+ i++;
}
return &mflow->ibflow;
@@ -1489,272 +1928,6 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
- struct net_device *dev)
-{
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
- if (vlan_id < 0x1000) {
- eui[3] = vlan_id >> 8;
- eui[4] = vlan_id & 0xff;
- } else {
- eui[3] = 0xff;
- eui[4] = 0xfe;
- }
- eui[0] ^= 2;
-}
-
-static void update_gids_task(struct work_struct *work)
-{
- struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
- struct mlx4_cmd_mailbox *mailbox;
- union ib_gid *gids;
- int err;
- struct mlx4_dev *dev = gw->dev->dev;
- int is_bonded = mlx4_is_bonded(dev);
-
- if (!gw->dev->ib_active)
- return;
-
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox)) {
- pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
- return;
- }
-
- gids = mailbox->buf;
- memcpy(gids, gw->gids, sizeof gw->gids);
-
- err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
- MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
- if (err)
- pr_warn("set port command failed\n");
- else
- if ((gw->port == 1) || !is_bonded)
- mlx4_ib_dispatch_event(gw->dev,
- is_bonded ? 1 : gw->port,
- IB_EVENT_GID_CHANGE);
-
- mlx4_free_cmd_mailbox(dev, mailbox);
- kfree(gw);
-}
-
-static void reset_gids_task(struct work_struct *work)
-{
- struct update_gid_work *gw =
- container_of(work, struct update_gid_work, work);
- struct mlx4_cmd_mailbox *mailbox;
- union ib_gid *gids;
- int err;
- struct mlx4_dev *dev = gw->dev->dev;
-
- if (!gw->dev->ib_active)
- return;
-
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox)) {
- pr_warn("reset gid table failed\n");
- goto free;
- }
-
- gids = mailbox->buf;
- memcpy(gids, gw->gids, sizeof(gw->gids));
-
- if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
- IB_LINK_LAYER_ETHERNET) {
- err = mlx4_cmd(dev, mailbox->dma,
- MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
- MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B,
- MLX4_CMD_WRAPPED);
- if (err)
- pr_warn("set port %d command failed\n", gw->port);
- }
-
- mlx4_free_cmd_mailbox(dev, mailbox);
-free:
- kfree(gw);
-}
-
-static int update_gid_table(struct mlx4_ib_dev *dev, int port,
- union ib_gid *gid, int clear,
- int default_gid)
-{
- struct update_gid_work *work;
- int i;
- int need_update = 0;
- int free = -1;
- int found = -1;
- int max_gids;
-
- if (default_gid) {
- free = 0;
- } else {
- max_gids = dev->dev->caps.gid_table_len[port];
- for (i = 1; i < max_gids; ++i) {
- if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
- sizeof(*gid)))
- found = i;
-
- if (clear) {
- if (found >= 0) {
- need_update = 1;
- dev->iboe.gid_table[port - 1][found] =
- zgid;
- break;
- }
- } else {
- if (found >= 0)
- break;
-
- if (free < 0 &&
- !memcmp(&dev->iboe.gid_table[port - 1][i],
- &zgid, sizeof(*gid)))
- free = i;
- }
- }
- }
-
- if (found == -1 && !clear && free >= 0) {
- dev->iboe.gid_table[port - 1][free] = *gid;
- need_update = 1;
- }
-
- if (!need_update)
- return 0;
-
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
- INIT_WORK(&work->work, update_gids_task);
- work->port = port;
- work->dev = dev;
- queue_work(wq, &work->work);
-
- return 0;
-}
-
-static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid)
-{
- gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
-}
-
-
-static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port)
-{
- struct update_gid_work *work;
-
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids));
- memset(work->gids, 0, sizeof(work->gids));
- INIT_WORK(&work->work, reset_gids_task);
- work->dev = dev;
- work->port = port;
- queue_work(wq, &work->work);
- return 0;
-}
-
-static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
- struct mlx4_ib_dev *ibdev, union ib_gid *gid)
-{
- struct mlx4_ib_iboe *iboe;
- int port = 0;
- struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
- rdma_vlan_dev_real_dev(event_netdev) :
- event_netdev;
- union ib_gid default_gid;
-
- mlx4_make_default_gid(real_dev, &default_gid);
-
- if (!memcmp(gid, &default_gid, sizeof(*gid)))
- return 0;
-
- if (event != NETDEV_DOWN && event != NETDEV_UP)
- return 0;
-
- if ((real_dev != event_netdev) &&
- (event == NETDEV_DOWN) &&
- rdma_link_local_addr((struct in6_addr *)gid))
- return 0;
-
- iboe = &ibdev->iboe;
- spin_lock_bh(&iboe->lock);
-
- for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
- if ((netif_is_bond_master(real_dev) &&
- (real_dev == iboe->masters[port - 1])) ||
- (!netif_is_bond_master(real_dev) &&
- (real_dev == iboe->netdevs[port - 1])))
- update_gid_table(ibdev, port, gid,
- event == NETDEV_DOWN, 0);
-
- spin_unlock_bh(&iboe->lock);
- return 0;
-
-}
-
-static u8 mlx4_ib_get_dev_port(struct net_device *dev,
- struct mlx4_ib_dev *ibdev)
-{
- u8 port = 0;
- struct mlx4_ib_iboe *iboe;
- struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
- rdma_vlan_dev_real_dev(dev) : dev;
-
- iboe = &ibdev->iboe;
-
- for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
- if ((netif_is_bond_master(real_dev) &&
- (real_dev == iboe->masters[port - 1])) ||
- (!netif_is_bond_master(real_dev) &&
- (real_dev == iboe->netdevs[port - 1])))
- break;
-
- if ((port == 0) || (port > ibdev->dev->caps.num_ports))
- return 0;
- else
- return port;
-}
-
-static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct mlx4_ib_dev *ibdev;
- struct in_ifaddr *ifa = ptr;
- union ib_gid gid;
- struct net_device *event_netdev = ifa->ifa_dev->dev;
-
- ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
-
- ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
-
- mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
- return NOTIFY_DONE;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct mlx4_ib_dev *ibdev;
- struct inet6_ifaddr *ifa = ptr;
- union ib_gid *gid = (union ib_gid *)&ifa->addr;
- struct net_device *event_netdev = ifa->idev->dev;
-
- ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
-
- mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
- return NOTIFY_DONE;
-}
-#endif
-
#define MLX4_IB_INVALID_MAC ((u64)-1)
static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
@@ -1813,94 +1986,6 @@ unlock:
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}
-static void mlx4_ib_get_dev_addr(struct net_device *dev,
- struct mlx4_ib_dev *ibdev, u8 port)
-{
- struct in_device *in_dev;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_dev *in6_dev;
- union ib_gid *pgid;
- struct inet6_ifaddr *ifp;
- union ib_gid default_gid;
-#endif
- union ib_gid gid;
-
-
- if ((port == 0) || (port > ibdev->dev->caps.num_ports))
- return;
-
- /* IPv4 gids */
- in_dev = in_dev_get(dev);
- if (in_dev) {
- for_ifa(in_dev) {
- /*ifa->ifa_address;*/
- ipv6_addr_set_v4mapped(ifa->ifa_address,
- (struct in6_addr *)&gid);
- update_gid_table(ibdev, port, &gid, 0, 0);
- }
- endfor_ifa(in_dev);
- in_dev_put(in_dev);
- }
-#if IS_ENABLED(CONFIG_IPV6)
- mlx4_make_default_gid(dev, &default_gid);
- /* IPv6 gids */
- in6_dev = in6_dev_get(dev);
- if (in6_dev) {
- read_lock_bh(&in6_dev->lock);
- list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
- pgid = (union ib_gid *)&ifp->addr;
- if (!memcmp(pgid, &default_gid, sizeof(*pgid)))
- continue;
- update_gid_table(ibdev, port, pgid, 0, 0);
- }
- read_unlock_bh(&in6_dev->lock);
- in6_dev_put(in6_dev);
- }
-#endif
-}
-
-static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev,
- struct net_device *dev, u8 port)
-{
- union ib_gid gid;
- mlx4_make_default_gid(dev, &gid);
- update_gid_table(ibdev, port, &gid, 0, 1);
-}
-
-static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
-{
- struct net_device *dev;
- struct mlx4_ib_iboe *iboe = &ibdev->iboe;
- int i;
- int err = 0;
-
- for (i = 1; i <= ibdev->num_ports; ++i) {
- if (rdma_port_get_link_layer(&ibdev->ib_dev, i) ==
- IB_LINK_LAYER_ETHERNET) {
- err = reset_gid_table(ibdev, i);
- if (err)
- goto out;
- }
- }
-
- read_lock(&dev_base_lock);
- spin_lock_bh(&iboe->lock);
-
- for_each_netdev(&init_net, dev) {
- u8 port = mlx4_ib_get_dev_port(dev, ibdev);
- /* port will be non-zero only for ETH ports */
- if (port) {
- mlx4_ib_set_default_gid(ibdev, dev, port);
- mlx4_ib_get_dev_addr(dev, ibdev, port);
- }
- }
-
- spin_unlock_bh(&iboe->lock);
- read_unlock(&dev_base_lock);
-out:
- return err;
-}
-
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
unsigned long event)
@@ -1910,81 +1995,22 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
int update_qps_port = -1;
int port;
+ ASSERT_RTNL();
+
iboe = &ibdev->iboe;
spin_lock_bh(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
- enum ib_port_state port_state = IB_PORT_NOP;
- struct net_device *old_master = iboe->masters[port - 1];
- struct net_device *curr_netdev;
- struct net_device *curr_master;
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
- if (iboe->netdevs[port - 1])
- mlx4_ib_set_default_gid(ibdev,
- iboe->netdevs[port - 1], port);
- curr_netdev = iboe->netdevs[port - 1];
-
- if (iboe->netdevs[port - 1] &&
- netif_is_bond_slave(iboe->netdevs[port - 1])) {
- iboe->masters[port - 1] = netdev_master_upper_dev_get(
- iboe->netdevs[port - 1]);
- } else {
- iboe->masters[port - 1] = NULL;
- }
- curr_master = iboe->masters[port - 1];
if (dev == iboe->netdevs[port - 1] &&
(event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
event == NETDEV_UP || event == NETDEV_CHANGE))
update_qps_port = port;
- if (curr_netdev) {
- port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
- IB_PORT_ACTIVE : IB_PORT_DOWN;
- mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
- if (curr_master) {
- /* if using bonding/team and a slave port is down, we
- * don't want the bond IP based gids in the table since
- * flows that select port by gid may get the down port.
- */
- if (port_state == IB_PORT_DOWN &&
- !mlx4_is_bonded(ibdev->dev)) {
- reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev,
- curr_netdev,
- port);
- } else {
- /* gids from the upper dev (bond/team)
- * should appear in port's gid table
- */
- mlx4_ib_get_dev_addr(curr_master,
- ibdev, port);
- }
- }
- /* if bonding is used it is possible that we add it to
- * masters only after IP address is assigned to the
- * net bonding interface.
- */
- if (curr_master && (old_master != curr_master)) {
- reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev,
- curr_netdev, port);
- mlx4_ib_get_dev_addr(curr_master, ibdev, port);
- }
-
- if (!curr_master && (old_master != curr_master)) {
- reset_gid_table(ibdev, port);
- mlx4_ib_set_default_gid(ibdev,
- curr_netdev, port);
- mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
- }
- } else {
- reset_gid_table(ibdev, port);
- }
}
-
spin_unlock_bh(&iboe->lock);
if (update_qps_port > 0)
@@ -2041,77 +2067,75 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
- char name[80];
- int eq_per_port = 0;
- int added_eqs = 0;
- int total_eqs = 0;
- int i, j, eq;
+ int i, j, eq = 0, total_eqs = 0;
- /* Legacy mode or comp_pool is not large enough */
- if (dev->caps.comp_pool == 0 ||
- dev->caps.num_ports > dev->caps.comp_pool)
- return;
-
- eq_per_port = dev->caps.comp_pool / dev->caps.num_ports;
-
- /* Init eq table */
- added_eqs = 0;
- mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
- added_eqs += eq_per_port;
-
- total_eqs = dev->caps.num_comp_vectors + added_eqs;
-
- ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
+ ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
+ sizeof(ibdev->eq_table[0]), GFP_KERNEL);
if (!ibdev->eq_table)
return;
- ibdev->eq_added = added_eqs;
-
- eq = 0;
- mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
- for (j = 0; j < eq_per_port; j++) {
- snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
- i, j, dev->persist->pdev->bus->name);
- /* Set IRQ for specific name (per ring) */
- if (mlx4_assign_eq(dev, name, NULL,
- &ibdev->eq_table[eq])) {
- /* Use legacy (same as mlx4_en driver) */
- pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
- ibdev->eq_table[eq] =
- (eq % dev->caps.num_comp_vectors);
- }
- eq++;
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
+ j++, total_eqs++) {
+ if (i > 1 && mlx4_is_eq_shared(dev, total_eqs))
+ continue;
+ ibdev->eq_table[eq] = total_eqs;
+ if (!mlx4_assign_eq(dev, i,
+ &ibdev->eq_table[eq]))
+ eq++;
+ else
+ ibdev->eq_table[eq] = -1;
}
}
- /* Fill the reset of the vector with legacy EQ */
- for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
- ibdev->eq_table[eq++] = i;
+ for (i = eq; i < dev->caps.num_comp_vectors;
+ ibdev->eq_table[i++] = -1)
+ ;
/* Advertise the new number of EQs to clients */
- ibdev->ib_dev.num_comp_vectors = total_eqs;
+ ibdev->ib_dev.num_comp_vectors = eq;
}
static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
int i;
+ int total_eqs = ibdev->ib_dev.num_comp_vectors;
- /* no additional eqs were added */
+ /* no eqs were allocated */
if (!ibdev->eq_table)
return;
/* Reset the advertised EQ number */
- ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
+ ibdev->ib_dev.num_comp_vectors = 0;
- /* Free only the added eqs */
- for (i = 0; i < ibdev->eq_added; i++) {
- /* Don't free legacy eqs if used */
- if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
- continue;
+ for (i = 0; i < total_eqs; i++)
mlx4_release_eq(dev, ibdev->eq_table[i]);
- }
kfree(ibdev->eq_table);
+ ibdev->eq_table = NULL;
+}
+
+static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ err = mlx4_ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ else
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
}
static void *mlx4_ib_add(struct mlx4_dev *dev)
@@ -2123,6 +2147,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
struct mlx4_ib_iboe *iboe;
int ib_num_ports = 0;
int num_req_counters;
+ int allocated;
+ u32 counter_index;
+ struct counter_index *new_counter_index = NULL;
pr_info_once("%s", mlx4_ib_version);
@@ -2167,6 +2194,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
+ ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
+ ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
+ ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2235,12 +2265,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
- ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
- ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
- ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
+ ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
+ ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
+ ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
+ ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
@@ -2278,6 +2309,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
}
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
+
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
@@ -2285,22 +2321,58 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (init_node_data(ibdev))
goto err_map;
+ for (i = 0; i < ibdev->num_ports; ++i) {
+ mutex_init(&ibdev->counters_table[i].mutex);
+ INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+ }
+
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
for (i = 0; i < num_req_counters; ++i) {
mutex_init(&ibdev->qp1_proxy_lock[i]);
+ allocated = 0;
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
- err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
+ err = mlx4_counter_alloc(ibdev->dev, &counter_index);
+ /* if failed to allocate a new counter, use default */
if (err)
- ibdev->counters[i] = -1;
- } else {
- ibdev->counters[i] = -1;
+ counter_index =
+ mlx4_get_default_counter_index(dev,
+ i + 1);
+ else
+ allocated = 1;
+ } else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
+ counter_index = mlx4_get_default_counter_index(dev,
+ i + 1);
}
+ new_counter_index = kmalloc(sizeof(*new_counter_index),
+ GFP_KERNEL);
+ if (!new_counter_index) {
+ if (allocated)
+ mlx4_counter_free(ibdev->dev, counter_index);
+ goto err_counter;
+ }
+ new_counter_index->index = counter_index;
+ new_counter_index->allocated = allocated;
+ list_add_tail(&new_counter_index->list,
+ &ibdev->counters_table[i].counters_list);
+ ibdev->counters_table[i].default_counter = counter_index;
+ pr_info("counter index %d for port %d allocated %d\n",
+ counter_index, i + 1, allocated);
}
if (mlx4_is_bonded(dev))
- for (i = 1; i < ibdev->num_ports ; ++i)
- ibdev->counters[i] = ibdev->counters[0];
-
+ for (i = 1; i < ibdev->num_ports ; ++i) {
+ new_counter_index =
+ kmalloc(sizeof(struct counter_index),
+ GFP_KERNEL);
+ if (!new_counter_index)
+ goto err_counter;
+ new_counter_index->index = counter_index;
+ new_counter_index->allocated = 0;
+ list_add_tail(&new_counter_index->list,
+ &ibdev->counters_table[i].counters_list);
+ ibdev->counters_table[i].default_counter =
+ counter_index;
+ }
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
@@ -2360,26 +2432,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
}
- if (!iboe->nb_inet.notifier_call) {
- iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
- err = register_inetaddr_notifier(&iboe->nb_inet);
- if (err) {
- iboe->nb_inet.notifier_call = NULL;
- goto err_notif;
- }
- }
-#if IS_ENABLED(CONFIG_IPV6)
- if (!iboe->nb_inet6.notifier_call) {
- iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
- err = register_inet6addr_notifier(&iboe->nb_inet6);
- if (err) {
- iboe->nb_inet6.notifier_call = NULL;
- goto err_notif;
- }
- }
-#endif
- if (mlx4_ib_init_gid_table(ibdev))
- goto err_notif;
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -2410,18 +2462,6 @@ err_notif:
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
- if (ibdev->iboe.nb_inet.notifier_call) {
- if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
- pr_warn("failure unregistering notifier\n");
- ibdev->iboe.nb_inet.notifier_call = NULL;
- }
-#if IS_ENABLED(CONFIG_IPV6)
- if (ibdev->iboe.nb_inet6.notifier_call) {
- if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
- pr_warn("failure unregistering notifier\n");
- ibdev->iboe.nb_inet6.notifier_call = NULL;
- }
-#endif
flush_workqueue(wq);
mlx4_ib_close_sriov(ibdev);
@@ -2440,9 +2480,8 @@ err_steer_qp_release:
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
err_counter:
- for (; i; --i)
- if (ibdev->counters[i - 1] != -1)
- mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]);
+ for (i = 0; i < ibdev->num_ports; ++i)
+ mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
err_map:
iounmap(ibdev->uar_map);
@@ -2545,23 +2584,10 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
kfree(ibdev->ib_uc_qpns_bitmap);
}
- if (ibdev->iboe.nb_inet.notifier_call) {
- if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
- pr_warn("failure unregistering notifier\n");
- ibdev->iboe.nb_inet.notifier_call = NULL;
- }
-#if IS_ENABLED(CONFIG_IPV6)
- if (ibdev->iboe.nb_inet6.notifier_call) {
- if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
- pr_warn("failure unregistering notifier\n");
- ibdev->iboe.nb_inet6.notifier_call = NULL;
- }
-#endif
-
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
- if (ibdev->counters[p] != -1)
- mlx4_counter_free(ibdev->dev, ibdev->counters[p]);
+ mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
+
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
mlx4_CLOSE_PORT(dev, p);
@@ -2592,31 +2618,33 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
if (!dm) {
pr_err("failed to allocate memory for tunneling qp update\n");
- goto out;
+ return;
}
for (i = 0; i < ports; i++) {
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
if (!dm[i]) {
pr_err("failed to allocate memory for tunneling qp update work struct\n");
- for (i = 0; i < dev->caps.num_ports; i++) {
- if (dm[i])
- kfree(dm[i]);
- }
+ while (--i >= 0)
+ kfree(dm[i]);
goto out;
}
- }
- /* initialize or tear down tunnel QPs for the slave */
- for (i = 0; i < ports; i++) {
INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
dm[i]->port = first_port + i + 1;
dm[i]->slave = slave;
dm[i]->do_init = do_init;
dm[i]->dev = ibdev;
- spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
- if (!ibdev->sriov.is_going_down)
+ }
+ /* initialize or tear down tunnel QPs for the slave */
+ spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
+ if (!ibdev->sriov.is_going_down) {
+ for (i = 0; i < ports; i++)
queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
+ } else {
+ spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
+ for (i = 0; i < ports; i++)
+ kfree(dm[i]);
}
out:
kfree(dm);