diff options
Diffstat (limited to 'kernel/drivers/net/ethernet/mellanox')
75 files changed, 23590 insertions, 1108 deletions
diff --git a/kernel/drivers/net/ethernet/mellanox/Kconfig b/kernel/drivers/net/ethernet/mellanox/Kconfig index 8cf7563a8..d54701047 100644 --- a/kernel/drivers/net/ethernet/mellanox/Kconfig +++ b/kernel/drivers/net/ethernet/mellanox/Kconfig @@ -7,9 +7,7 @@ config NET_VENDOR_MELLANOX default y depends on PCI ---help--- - If you have a network (Ethernet) card belonging to this class, say Y - and read the Ethernet-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. + If you have a network (Ethernet) card belonging to this class, say Y. Note that the answer to this question doesn't directly affect the kernel: saying N will just cause the configurator to skip all @@ -20,5 +18,6 @@ if NET_VENDOR_MELLANOX source "drivers/net/ethernet/mellanox/mlx4/Kconfig" source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig" +source "drivers/net/ethernet/mellanox/mlxsw/Kconfig" endif # NET_VENDOR_MELLANOX diff --git a/kernel/drivers/net/ethernet/mellanox/Makefile b/kernel/drivers/net/ethernet/mellanox/Makefile index 38fe32ef5..2e2a5ec50 100644 --- a/kernel/drivers/net/ethernet/mellanox/Makefile +++ b/kernel/drivers/net/ethernet/mellanox/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_MLX4_CORE) += mlx4/ obj-$(CONFIG_MLX5_CORE) += mlx5/core/ +obj-$(CONFIG_MLXSW_CORE) += mlxsw/ diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/cmd.c b/kernel/drivers/net/ethernet/mellanox/mlx4/cmd.c index 529ef0594..d48d57934 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -49,6 +49,7 @@ #include "mlx4.h" #include "fw.h" #include "fw_qos.h" +#include "mlx4_stats.h" #define CMD_POLL_TOKEN 0xffff #define INBOX_MASK 0xffffffffffffff00ULL @@ -685,6 +686,7 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; + long ret_wait; int err = 0; down(&cmd->event_sem); @@ -710,8 +712,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (err) goto out_reset; - if (!wait_for_completion_timeout(&context->done, - msecs_to_jiffies(timeout))) { + if (op == MLX4_CMD_SENSE_PORT) { + ret_wait = + wait_for_completion_interruptible_timeout(&context->done, + msecs_to_jiffies(timeout)); + if (ret_wait < 0) { + context->fw_status = 0; + context->out_param = 0; + context->result = 0; + } + } else { + ret_wait = (long)wait_for_completion_timeout(&context->done, + msecs_to_jiffies(timeout)); + } + if (!ret_wait) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); if (op == MLX4_CMD_NOP) { @@ -882,7 +896,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, { struct ib_smp *smp = inbox->buf; u32 index; - u8 port; + u8 port, slave_port; u8 opcode_modifier; u16 *table; int err; @@ -894,7 +908,8 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, __be32 slave_cap_mask; __be64 slave_node_guid; - port = vhcr->in_modifier; + slave_port = vhcr->in_modifier; + port = mlx4_slave_convert_port(dev, slave, slave_port); /* network-view bit is for driver use only, and should not be passed to FW */ opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */ @@ -930,8 +945,9 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) { /*get the slave specific caps:*/ /*do the command */ + smp->attr_mod = cpu_to_be32(port); err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, opcode_modifier, + port, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); /* modify the response for slaves */ if (!err && slave != mlx4_master_func_num(dev)) { @@ -975,7 +991,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, opcode_modifier, + port, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { slave_node_guid = mlx4_get_slave_node_guid(dev, slave); @@ -994,7 +1010,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && smp->method == IB_MGMT_METHOD_GET) || network_view) { mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n", - slave, smp->method, smp->mgmt_class, + slave, smp->mgmt_class, smp->method, network_view ? "Network" : "Host", be16_to_cpu(smp->attr_id)); return -EPERM; @@ -2382,7 +2398,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } } - memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size); + memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); @@ -2915,7 +2931,7 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) port = mlx4_slaves_closest_port(dev, slave, port); s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; s_info->mac = mac; - mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n", + mlx4_info(dev, "default mac on vf %d port %d to %llX will take effect only after vf restart\n", vf, port, s_info->mac); return 0; } @@ -3164,6 +3180,92 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat } EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); +int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, + struct mlx4_counter *counter_stats, int reset) +{ + struct mlx4_cmd_mailbox *mailbox = NULL; + struct mlx4_counter *tmp_counter; + int err; + u32 if_stat_in_mod; + + if (!counter_stats) + return -EINVAL; + + if (counter_index == MLX4_SINK_COUNTER_INDEX(dev)) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, sizeof(struct mlx4_counter)); + if_stat_in_mod = counter_index; + if (reset) + if_stat_in_mod |= MLX4_QUERY_IF_STAT_RESET; + err = mlx4_cmd_box(dev, 0, mailbox->dma, + if_stat_in_mod, 0, + MLX4_CMD_QUERY_IF_STAT, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) { + mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", + __func__, counter_index); + goto if_stat_out; + } + tmp_counter = (struct mlx4_counter *)mailbox->buf; + counter_stats->counter_mode = tmp_counter->counter_mode; + if (counter_stats->counter_mode == 0) { + counter_stats->rx_frames = + cpu_to_be64(be64_to_cpu(counter_stats->rx_frames) + + be64_to_cpu(tmp_counter->rx_frames)); + counter_stats->tx_frames = + cpu_to_be64(be64_to_cpu(counter_stats->tx_frames) + + be64_to_cpu(tmp_counter->tx_frames)); + counter_stats->rx_bytes = + cpu_to_be64(be64_to_cpu(counter_stats->rx_bytes) + + be64_to_cpu(tmp_counter->rx_bytes)); + counter_stats->tx_bytes = + cpu_to_be64(be64_to_cpu(counter_stats->tx_bytes) + + be64_to_cpu(tmp_counter->tx_bytes)); + } + +if_stat_out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_counter_stats); + +int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, + struct ifla_vf_stats *vf_stats) +{ + struct mlx4_counter tmp_vf_stats; + int slave; + int err = 0; + + if (!vf_stats) + return -EINVAL; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf_idx); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + err = mlx4_calc_vf_counters(dev, slave, port, &tmp_vf_stats); + if (!err && tmp_vf_stats.counter_mode == 0) { + vf_stats->rx_packets = be64_to_cpu(tmp_vf_stats.rx_frames); + vf_stats->tx_packets = be64_to_cpu(tmp_vf_stats.tx_frames); + vf_stats->rx_bytes = be64_to_cpu(tmp_vf_stats.rx_bytes); + vf_stats->tx_bytes = be64_to_cpu(tmp_vf_stats.tx_bytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_vf_stats); + int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -3197,6 +3299,12 @@ int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, int enabled) { struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_active_ports actv_ports = mlx4_get_active_ports( + &priv->dev, slave); + int min_port = find_first_bit(actv_ports.ports, + priv->dev.caps.num_ports) + 1; + int max_port = min_port - 1 + + bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports); if (slave == mlx4_master_func_num(dev)) return 0; @@ -3206,6 +3314,11 @@ int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, enabled < 0 || enabled > 1) return -EINVAL; + if (min_port == max_port && dev->caps.num_ports > 1) { + mlx4_info(dev, "SMI access disallowed for single ported VFs\n"); + return -EPROTONOSUPPORT; + } + priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled; return 0; } diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/cq.c b/kernel/drivers/net/ethernet/mellanox/mlx4/cq.c index e71f31387..3348e646d 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -292,7 +292,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, u64 mtt_addr; int err; - if (vector > dev->caps.num_comp_vectors + dev->caps.comp_pool) + if (vector >= dev->caps.num_comp_vectors) return -EINVAL; cq->vector = vector; @@ -319,7 +319,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq_context->flags |= cpu_to_be32(1 << 19); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); - cq_context->comp_eqn = priv->eq_table.eq[vector].eqn; + cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); @@ -339,11 +339,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, init_completion(&cq->free); cq->comp = mlx4_add_cq_to_tasklet; cq->tasklet_ctx.priv = - &priv->eq_table.eq[cq->vector].tasklet_ctx; + &priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].tasklet_ctx; INIT_LIST_HEAD(&cq->tasklet_ctx.list); - cq->irq = priv->eq_table.eq[cq->vector].irq; + cq->irq = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].irq; return 0; err_radix: @@ -368,7 +368,10 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); - synchronize_irq(priv->eq_table.eq[cq->vector].irq); + synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); + if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != + priv->eq_table.eq[MLX4_EQ_ASYNC].irq) + synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 8a083d73e..1494997c4 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -236,23 +236,43 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { .enable = mlx4_en_phc_enable, }; +#define MLX4_EN_WRAP_AROUND_SEC 10ULL + +/* This function calculates the max shift that enables the user range + * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. + */ +static u32 freq_to_shift(u16 freq) +{ + u32 freq_khz = freq * 1000; + u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? + max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + /* calculate max possible multiplier in order to fit in 64bit */ + u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + + /* This comes from the reverse of clocksource_khz2mult */ + return ilog2(div_u64(max_mul * freq_khz, 1000000)); +} + void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; unsigned long flags; u64 ns, zero = 0; + /* mlx4_en_init_timestamp is called for each netdev. + * mdev->ptp_clock is common for all ports, skip initialization if + * was done for other port. + */ + if (mdev->ptp_clock) + return; + rwlock_init(&mdev->clock_lock); memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; mdev->cycles.mask = CLOCKSOURCE_MASK(48); - /* Using shift to make calculation more accurate. Since current HW - * clock frequency is 427 MHz, and cycles are given using a 48 bits - * register, the biggest shift when calculating using u64, is 14 - * (max_cycles * multiplier < 2^64) - */ - mdev->cycles.shift = 14; + mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock); mdev->cycles.mult = clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); mdev->nominal_c_mult = mdev->cycles.mult; diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 22da4d0d0..eb8a4988d 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -66,6 +66,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, cq->ring = ring; cq->is_tx = mode; + cq->vector = mdev->dev->caps.num_comp_vectors; /* Allocate HW buffers on provided NUMA node. * dev->numa_node is used in mtt range allocation flow. @@ -99,14 +100,8 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, { struct mlx4_en_dev *mdev = priv->mdev; int err = 0; - char name[25]; int timestamp_en = 0; - struct cpu_rmap *rmap = -#ifdef CONFIG_RFS_ACCEL - priv->dev->rx_cpu_rmap; -#else - NULL; -#endif + bool assigned_eq = false; cq->dev = mdev->pndev[priv->port]; cq->mcq.set_ci_db = cq->wqres.db.db; @@ -116,23 +111,19 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, memset(cq->buf, 0, cq->buf_size); if (cq->is_tx == RX) { - if (mdev->dev->caps.comp_pool) { - if (!cq->vector) { - sprintf(name, "%s-%d", priv->dev->name, - cq->ring); - /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(mdev->dev, name, rmap, - &cq->vector)) { - cq->vector = (cq->ring + 1 + priv->port) - % mdev->dev->caps.num_comp_vectors; - mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", - name); - } - + if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port, + cq->vector)) { + cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask); + + err = mlx4_assign_eq(mdev->dev, priv->port, + &cq->vector); + if (err) { + mlx4_err(mdev, "Failed assigning an EQ to CQ vector %d\n", + cq->vector); + goto free_eq; } - } else { - cq->vector = (cq->ring + 1 + priv->port) % - mdev->dev->caps.num_comp_vectors; + + assigned_eq = true; } cq->irq_desc = @@ -159,7 +150,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, cq->vector, 0, timestamp_en); if (err) - return err; + goto free_eq; cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; @@ -168,13 +159,6 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, NAPI_POLL_WEIGHT); } else { - struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; - - err = irq_set_affinity_hint(cq->mcq.irq, - ring->affinity_mask); - if (err) - mlx4_warn(mdev, "Failed setting affinity hint\n"); - netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); napi_hash_add(&cq->napi); } @@ -182,6 +166,12 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, napi_enable(&cq->napi); return 0; + +free_eq: + if (assigned_eq) + mlx4_release_eq(mdev->dev, cq->vector); + cq->vector = mdev->dev->caps.num_comp_vectors; + return err; } void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) @@ -191,9 +181,9 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) mlx4_en_unmap_buffer(&cq->wqres.buf); mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); - if (priv->mdev->dev->caps.comp_pool && cq->vector) { + if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) && + cq->is_tx == RX) mlx4_release_eq(priv->mdev->dev, cq->vector); - } cq->vector = 0; cq->buf_size = 0; cq->buf = NULL; @@ -207,7 +197,6 @@ void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) if (!cq->is_tx) { napi_hash_del(&cq->napi); synchronize_rcu(); - irq_set_affinity_hint(cq->mcq.irq, NULL); } netif_napi_del(&cq->napi); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index a2ddf3d75..ddb554188 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -95,13 +95,11 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) (u16) (mdev->dev->caps.fw_ver & 0xffff)); strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev), sizeof(drvinfo->bus_info)); - drvinfo->n_stats = 0; - drvinfo->regdump_len = 0; - drvinfo->eedump_len = 0; } static const char mlx4_en_priv_flags[][ETH_GSTRING_LEN] = { "blueflame", + "phv-bit" }; static const char main_strings[][ETH_GSTRING_LEN] = { @@ -119,6 +117,12 @@ static const char main_strings[][ETH_GSTRING_LEN] = { "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload", + /* pf statistics */ + "pf_rx_packets", + "pf_rx_bytes", + "pf_tx_packets", + "pf_tx_bytes", + /* priority flow control statistics rx */ "rx_pause_prio_0", "rx_pause_duration_prio_0", "rx_pause_transition_prio_0", @@ -368,6 +372,11 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, if (bitmap_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->port_stats)[i]; + for (i = 0; i < NUM_PF_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = + ((unsigned long *)&priv->pf_stats)[i]; + for (i = 0; i < NUM_FLOW_PRIORITY_STATS_RX; i++, bitmap_iterator_inc(&it)) if (bitmap_iterator_test(&it)) @@ -448,6 +457,12 @@ static void mlx4_en_get_strings(struct net_device *dev, strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[strings]); + for (i = 0; i < NUM_PF_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + for (i = 0; i < NUM_FLOW_STATS; i++, strings++, bitmap_iterator_inc(&it)) if (bitmap_iterator_test(&it)) @@ -1780,35 +1795,49 @@ static int mlx4_en_get_ts_info(struct net_device *dev, static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; bool bf_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_BLUEFLAME); bool bf_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_BLUEFLAME); + bool phv_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_PHV); + bool phv_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_PHV); int i; + int ret = 0; - if (bf_enabled_new == bf_enabled_old) - return 0; /* Nothing to do */ + if (bf_enabled_new != bf_enabled_old) { + if (bf_enabled_new) { + bool bf_supported = true; - if (bf_enabled_new) { - bool bf_supported = true; + for (i = 0; i < priv->tx_ring_num; i++) + bf_supported &= priv->tx_ring[i]->bf_alloced; - for (i = 0; i < priv->tx_ring_num; i++) - bf_supported &= priv->tx_ring[i]->bf_alloced; + if (!bf_supported) { + en_err(priv, "BlueFlame is not supported\n"); + return -EINVAL; + } - if (!bf_supported) { - en_err(priv, "BlueFlame is not supported\n"); - return -EINVAL; + priv->pflags |= MLX4_EN_PRIV_FLAGS_BLUEFLAME; + } else { + priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME; } - priv->pflags |= MLX4_EN_PRIV_FLAGS_BLUEFLAME; - } else { - priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME; - } - - for (i = 0; i < priv->tx_ring_num; i++) - priv->tx_ring[i]->bf_enabled = bf_enabled_new; + for (i = 0; i < priv->tx_ring_num; i++) + priv->tx_ring[i]->bf_enabled = bf_enabled_new; - en_info(priv, "BlueFlame %s\n", - bf_enabled_new ? "Enabled" : "Disabled"); + en_info(priv, "BlueFlame %s\n", + bf_enabled_new ? "Enabled" : "Disabled"); + } + if (phv_enabled_new != phv_enabled_old) { + ret = set_phv_bit(mdev->dev, priv->port, (int)phv_enabled_new); + if (ret) + return ret; + else if (phv_enabled_new) + priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; + else + priv->pflags &= ~MLX4_EN_PRIV_FLAGS_PHV; + en_info(priv, "PHV bit %s\n", + phv_enabled_new ? "Enabled" : "Disabled"); + } return 0; } diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_main.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_main.c index 913b716ed..e0ec280a7 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -123,6 +123,28 @@ void mlx4_en_update_loopback_state(struct net_device *dev, */ if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback) priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK; + + mutex_lock(&priv->mdev->state_lock); + if (priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB && + priv->rss_map.indir_qp.qpn) { + int i; + int err = 0; + int loopback = !!(features & NETIF_F_LOOPBACK); + + for (i = 0; i < priv->rx_ring_num; i++) { + int ret; + + ret = mlx4_en_change_mcast_lb(priv, + &priv->rss_map.qps[i], + loopback); + if (!err) + err = ret; + } + if (err) + mlx4_warn(priv->mdev, "failed to change mcast loopback\n"); + } + mutex_unlock(&priv->mdev->state_lock); } static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) @@ -210,9 +232,6 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) if (mdev->pndev[i]) mlx4_en_destroy_netdev(mdev->pndev[i]); - if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) - mlx4_en_remove_timestamp(mdev); - flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); (void) mlx4_mr_free(dev, &mdev->mr); @@ -224,6 +243,26 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) kfree(mdev); } +static void mlx4_en_activate(struct mlx4_dev *dev, void *ctx) +{ + int i; + struct mlx4_en_dev *mdev = ctx; + + /* Create a netdev for each port */ + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + mlx4_info(mdev, "Activating port:%d\n", i); + if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) + mdev->pndev[i] = NULL; + } + + /* register notifier */ + mdev->nb.notifier_call = mlx4_en_netdev_event; + if (register_netdevice_notifier(&mdev->nb)) { + mdev->nb.notifier_call = NULL; + mlx4_err(mdev, "Failed to create notifier\n"); + } +} + static void *mlx4_en_add(struct mlx4_dev *dev) { struct mlx4_en_dev *mdev; @@ -278,10 +317,6 @@ static void *mlx4_en_add(struct mlx4_dev *dev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) mdev->port_cnt++; - /* Initialize time stamp mechanism */ - if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) - mlx4_en_init_timestamp(mdev); - /* Set default number of RX rings*/ mlx4_en_set_num_rx_rings(mdev); @@ -297,21 +332,6 @@ static void *mlx4_en_add(struct mlx4_dev *dev) mutex_init(&mdev->state_lock); mdev->device_up = true; - /* Setup ports */ - - /* Create a netdev for each port */ - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { - mlx4_info(mdev, "Activating port:%d\n", i); - if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) - mdev->pndev[i] = NULL; - } - /* register notifier */ - mdev->nb.notifier_call = mlx4_en_netdev_event; - if (register_netdevice_notifier(&mdev->nb)) { - mdev->nb.notifier_call = NULL; - mlx4_err(mdev, "Failed to create notifier\n"); - } - return mdev; err_mr: @@ -335,6 +355,7 @@ static struct mlx4_interface mlx4_en_interface = { .event = mlx4_en_event, .get_dev = mlx4_en_get_netdev, .protocol = MLX4_PROT_ETH, + .activate = mlx4_en_activate, }; static void mlx4_en_verify_params(void) diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a5a0b8420..67e9633ea 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -573,10 +573,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; - struct mlx4_mac_entry *entry; int index = 0; int err = 0; - u64 reg_id = 0; int *qpn = &priv->base_qpn; u64 mac = mlx4_mac_to_u64(priv->dev->dev_addr); @@ -600,44 +598,11 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); - goto qp_err; + mlx4_unregister_mac(dev, priv->port, mac); + return err; } - err = mlx4_en_uc_steer_add(priv, priv->dev->dev_addr, qpn, ®_id); - if (err) - goto steer_err; - - err = mlx4_en_tunnel_steer_add(priv, priv->dev->dev_addr, *qpn, - &priv->tunnel_reg_id); - if (err) - goto tunnel_err; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { - err = -ENOMEM; - goto alloc_err; - } - memcpy(entry->mac, priv->dev->dev_addr, sizeof(entry->mac)); - memcpy(priv->current_mac, entry->mac, sizeof(priv->current_mac)); - entry->reg_id = reg_id; - - hlist_add_head_rcu(&entry->hlist, - &priv->mac_hash[entry->mac[MLX4_EN_MAC_HASH_IDX]]); - return 0; - -alloc_err: - if (priv->tunnel_reg_id) - mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); -tunnel_err: - mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id); - -steer_err: - mlx4_qp_release_range(dev, *qpn, 1); - -qp_err: - mlx4_unregister_mac(dev, priv->port, mac); - return err; } static void mlx4_en_put_qp(struct mlx4_en_priv *priv) @@ -645,39 +610,13 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv) struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int qpn = priv->base_qpn; - u64 mac; if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { - mac = mlx4_mac_to_u64(priv->dev->dev_addr); + u64 mac = mlx4_mac_to_u64(priv->dev->dev_addr); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", priv->dev->dev_addr); mlx4_unregister_mac(dev, priv->port, mac); } else { - struct mlx4_mac_entry *entry; - struct hlist_node *tmp; - struct hlist_head *bucket; - unsigned int i; - - for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { - bucket = &priv->mac_hash[i]; - hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { - mac = mlx4_mac_to_u64(entry->mac); - en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", - entry->mac); - mlx4_en_uc_steer_release(priv, entry->mac, - qpn, entry->reg_id); - - mlx4_unregister_mac(dev, priv->port, mac); - hlist_del_rcu(&entry->hlist); - kfree_rcu(entry, rcu); - } - } - - if (priv->tunnel_reg_id) { - mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); - priv->tunnel_reg_id = 0; - } - en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", priv->port, qpn); mlx4_qp_release_range(dev, qpn, 1); @@ -1283,6 +1222,75 @@ static void mlx4_en_netpoll(struct net_device *dev) } #endif +static int mlx4_en_set_rss_steer_rules(struct mlx4_en_priv *priv) +{ + u64 reg_id; + int err = 0; + int *qpn = &priv->base_qpn; + struct mlx4_mac_entry *entry; + + err = mlx4_en_uc_steer_add(priv, priv->dev->dev_addr, qpn, ®_id); + if (err) + return err; + + err = mlx4_en_tunnel_steer_add(priv, priv->dev->dev_addr, *qpn, + &priv->tunnel_reg_id); + if (err) + goto tunnel_err; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + err = -ENOMEM; + goto alloc_err; + } + + memcpy(entry->mac, priv->dev->dev_addr, sizeof(entry->mac)); + memcpy(priv->current_mac, entry->mac, sizeof(priv->current_mac)); + entry->reg_id = reg_id; + hlist_add_head_rcu(&entry->hlist, + &priv->mac_hash[entry->mac[MLX4_EN_MAC_HASH_IDX]]); + + return 0; + +alloc_err: + if (priv->tunnel_reg_id) + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + +tunnel_err: + mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id); + return err; +} + +static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv) +{ + u64 mac; + unsigned int i; + int qpn = priv->base_qpn; + struct hlist_head *bucket; + struct hlist_node *tmp; + struct mlx4_mac_entry *entry; + + for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { + bucket = &priv->mac_hash[i]; + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { + mac = mlx4_mac_to_u64(entry->mac); + en_dbg(DRV, priv, "Registering MAC:%pM for deleting\n", + entry->mac); + mlx4_en_uc_steer_release(priv, entry->mac, + qpn, entry->reg_id); + + mlx4_unregister_mac(priv->mdev->dev, priv->port, mac); + hlist_del_rcu(&entry->hlist); + kfree_rcu(entry, rcu); + } + } + + if (priv->tunnel_reg_id) { + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + priv->tunnel_reg_id = 0; + } +} + static void mlx4_en_tx_timeout(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1597,6 +1605,9 @@ int mlx4_en_start_port(struct net_device *dev) } mdev->mac_removed[priv->port] = 0; + priv->counter_index = + mlx4_get_default_counter_index(mdev->dev, priv->port); + err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); @@ -1681,6 +1692,11 @@ int mlx4_en_start_port(struct net_device *dev) goto tx_err; } + /* Set Unicast and VXLAN steering rules */ + if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0 && + mlx4_en_set_rss_steer_rules(priv)) + mlx4_warn(mdev, "Failed setting steering rules\n"); + /* Attach rx QP to bradcast address */ eth_broadcast_addr(&mc_list[10]); mc_list[5] = priv->port; /* needed for B0 steering support */ @@ -1755,6 +1771,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Set port as not active */ priv->port_up = false; + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); /* Promsicuous mode */ if (mdev->dev->caps.steering_mode == @@ -1827,6 +1844,9 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) for (i = 0; i < priv->tx_ring_num; i++) mlx4_en_free_tx_buf(dev, priv->tx_ring[i]); + if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0) + mlx4_en_delete_rss_steer_rules(priv); + /* Free RSS qps */ mlx4_en_release_rss_steer(priv); @@ -1891,6 +1911,7 @@ static void mlx4_en_clear_stats(struct net_device *dev) sizeof(priv->rx_priority_flowstats)); memset(&priv->tx_priority_flowstats, 0, sizeof(priv->tx_priority_flowstats)); + memset(&priv->pf_stats, 0, sizeof(priv->pf_stats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i]->bytes = 0; @@ -1954,7 +1975,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) int i; #ifdef CONFIG_RFS_ACCEL - free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); priv->dev->rx_cpu_rmap = NULL; #endif @@ -2008,11 +2028,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) } #ifdef CONFIG_RFS_ACCEL - if (priv->mdev->dev->caps.comp_pool) { - priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->mdev->dev->caps.comp_pool); - if (!priv->dev->rx_cpu_rmap) - goto err; - } + priv->dev->rx_cpu_rmap = mlx4_get_cpu_rmap(priv->mdev->dev, priv->port); #endif return 0; @@ -2056,6 +2072,9 @@ void mlx4_en_destroy_netdev(struct net_device *dev) /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_remove_timestamp(mdev); + /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; @@ -2184,6 +2203,25 @@ static int mlx4_en_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } +static netdev_features_t mlx4_en_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx4_en_priv *en_priv = netdev_priv(netdev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + /* Since there is no support for separate RX C-TAG/S-TAG vlan accel + * enable/disable make sure S-TAG flag is always in same state as + * C-TAG. + */ + if (features & NETIF_F_HW_VLAN_CTAG_RX && + !(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) + features |= NETIF_F_HW_VLAN_STAG_RX; + else + features &= ~NETIF_F_HW_VLAN_STAG_RX; + + return features; +} + static int mlx4_en_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2218,6 +2256,10 @@ static int mlx4_en_set_features(struct net_device *netdev, en_info(priv, "Turn %s TX vlan strip offload\n", (features & NETIF_F_HW_VLAN_CTAG_TX) ? "ON" : "OFF"); + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_STAG_TX)) + en_info(priv, "Turn %s TX S-VLAN strip offload\n", + (features & NETIF_F_HW_VLAN_STAG_TX) ? "ON" : "OFF"); + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_LOOPBACK)) { en_info(priv, "Turn %s loopback\n", (features & NETIF_F_LOOPBACK) ? "ON" : "OFF"); @@ -2288,6 +2330,15 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st return mlx4_set_vf_link_state(mdev->dev, en_priv->port, vf, link_state); } +static int mlx4_en_get_vf_stats(struct net_device *dev, int vf, + struct ifla_vf_stats *vf_stats) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_get_vf_stats(mdev->dev, en_priv->port, vf, vf_stats); +} + #define PORT_ID_BYTE_LEN 8 static int mlx4_en_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid) @@ -2330,8 +2381,6 @@ out: /* set offloads */ priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; - priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features |= NETIF_F_GSO_UDP_TUNNEL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2342,8 +2391,6 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) /* unset offloads */ priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); - priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features &= ~NETIF_F_GSO_UDP_TUNNEL; ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); @@ -2451,6 +2498,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_poll_controller = mlx4_en_netpoll, #endif .ndo_set_features = mlx4_en_set_features, + .ndo_fix_features = mlx4_en_fix_features, .ndo_setup_tc = mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, @@ -2485,11 +2533,13 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_set_vf_rate = mlx4_en_set_vf_rate, .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk, .ndo_set_vf_link_state = mlx4_en_set_vf_link_state, + .ndo_get_vf_stats = mlx4_en_get_vf_stats, .ndo_get_vf_config = mlx4_en_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx4_en_netpoll, #endif .ndo_set_features = mlx4_en_set_features, + .ndo_fix_features = mlx4_en_fix_features, .ndo_setup_tc = mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, @@ -2682,7 +2732,7 @@ void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev, u8 rx_ppp, u8 rx_pause, u8 tx_ppp, u8 tx_pause) { - int last_i = NUM_MAIN_STATS + NUM_PORT_STATS; + int last_i = NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PF_STATS; if (!mlx4_is_slave(dev) && (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN)) { @@ -2744,6 +2794,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, bitmap_set(stats_bitmap->bitmap, last_i, NUM_PORT_STATS); last_i += NUM_PORT_STATS; + if (mlx4_is_master(dev)) + bitmap_set(stats_bitmap->bitmap, last_i, + NUM_PF_STATS); + last_i += NUM_PF_STATS; + mlx4_en_update_pfc_stats_bitmap(dev, stats_bitmap, rx_ppp, rx_pause, tx_ppp, tx_pause); @@ -2760,7 +2815,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_priv *priv; int i; int err; - u64 mac_u64; dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv), MAX_TX_RINGS, MAX_RX_RINGS); @@ -2779,6 +2833,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv = netdev_priv(dev); memset(priv, 0, sizeof(struct mlx4_en_priv)); + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); @@ -2851,17 +2906,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->addr_len = ETH_ALEN; mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]); if (!is_valid_ether_addr(dev->dev_addr)) { - if (mlx4_is_slave(priv->mdev->dev)) { - eth_hw_addr_random(dev); - en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr); - mac_u64 = mlx4_mac_to_u64(dev->dev_addr); - mdev->dev->caps.def_mac[priv->port] = mac_u64; - } else { - en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n", - priv->port, dev->dev_addr); - err = -EINVAL; - goto out; - } + en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n", + priv->port, dev->dev_addr); + err = -EINVAL; + goto out; + } else if (mlx4_is_slave(priv->mdev->dev) && + (priv->mdev->dev->port_random_macs & 1 << priv->port)) { + /* Random MAC was assigned in mlx4_slave_cap + * in mlx4_core module + */ + dev->addr_assign_type |= NET_ADDR_RANDOM; + en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr); } memcpy(priv->current_mac, dev->dev_addr, sizeof(priv->current_mac)); @@ -2915,6 +2970,27 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) { + dev->features |= NETIF_F_HW_VLAN_STAG_RX | + NETIF_F_HW_VLAN_STAG_FILTER; + dev->hw_features |= NETIF_F_HW_VLAN_STAG_RX; + } + + if (mlx4_is_slave(mdev->dev)) { + int phv; + + err = get_phv_bit(mdev->dev, port, &phv); + if (!err && phv) { + dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; + } + } else { + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && + !(mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) + dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + } + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) dev->hw_features |= NETIF_F_RXFCS; @@ -2940,6 +3016,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + mdev->pndev[port] = dev; mdev->upper[port] = NULL; @@ -2981,9 +3062,12 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); + /* Initialize time stamp mechanism */ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) - queue_delayed_work(mdev->workqueue, &priv->service_task, - SERVICE_TASK_DELAY); + mlx4_en_init_timestamp(mdev); + + queue_delayed_work(mdev->workqueue, &priv->service_task, + SERVICE_TASK_DELAY); mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap, mdev->profile.prof[priv->port].rx_ppp, diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_port.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_port.c index 0a56f010c..3904b5fc0 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -149,6 +149,7 @@ static unsigned long en_stats_adder(__be64 *start, __be64 *next, int num) int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) { + struct mlx4_counter tmp_counter_stats; struct mlx4_en_stat_out_mbox *mlx4_en_stats; struct mlx4_en_stat_out_flow_control_mbox *flowstats; struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); @@ -156,7 +157,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) struct mlx4_cmd_mailbox *mailbox; u64 in_mod = reset << 8 | port; int err; - int i; + int i, counter_index; mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) @@ -202,6 +203,20 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.tso_packets += ring->tso_packets; priv->port_stats.xmit_more += ring->xmit_more; } + if (mlx4_is_master(mdev->dev)) { + stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0, + &mlx4_en_stats->RTOT_prio_1, + NUM_PRIORITIES); + stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0, + &mlx4_en_stats->TTOT_prio_1, + NUM_PRIORITIES); + stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0, + &mlx4_en_stats->ROCT_prio_1, + NUM_PRIORITIES); + stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0, + &mlx4_en_stats->TOCT_prio_1, + NUM_PRIORITIES); + } /* net device stats */ stats->rx_errors = be64_to_cpu(mlx4_en_stats->PCS) + @@ -223,11 +238,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->collisions = 0; stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP); stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); - stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_over_errors = 0; stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); stats->rx_frame_errors = 0; stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); - stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_missed_errors = 0; stats->tx_aborted_errors = 0; stats->tx_carrier_errors = 0; stats->tx_fifo_errors = 0; @@ -296,6 +311,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) spin_unlock_bh(&priv->stats_lock); + memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats)); + counter_index = mlx4_get_default_counter_index(mdev->dev, port); + err = mlx4_get_counter_stats(mdev->dev, counter_index, + &tmp_counter_stats, reset); + /* 0xffs indicates invalid value */ memset(mailbox->buf, 0xff, sizeof(*flowstats) * MLX4_NUM_PRIORITIES); @@ -314,6 +334,13 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) spin_lock_bh(&priv->stats_lock); + if (tmp_counter_stats.counter_mode == 0) { + priv->pf_stats.rx_bytes = be64_to_cpu(tmp_counter_stats.rx_bytes); + priv->pf_stats.tx_bytes = be64_to_cpu(tmp_counter_stats.tx_bytes); + priv->pf_stats.rx_packets = be64_to_cpu(tmp_counter_stats.rx_frames); + priv->pf_stats.tx_packets = be64_to_cpu(tmp_counter_stats.tx_frames); + } + for (i = 0; i < MLX4_NUM_PRIORITIES; i++) { priv->rx_priority_flowstats[i].rx_pause = be64_to_cpu(flowstats[i].rx_pause); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 34f2fdf4f..12aab5a65 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -66,9 +66,18 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->pri_path.sched_queue |= user_prio << 3; context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; } - context->pri_path.counter_index = 0xff; + context->pri_path.counter_index = priv->counter_index; context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); + if (!rss && + (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) && + context->pri_path.counter_index != + MLX4_SINK_COUNTER_INDEX(mdev->dev)) { + /* disable multicast loopback to qp with same counter */ + if (!(dev->features & NETIF_F_LOOPBACK)) + context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB; + context->pri_path.control |= MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; + } context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) context->param3 |= cpu_to_be32(1 << 30); @@ -80,6 +89,22 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, } } +int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, + int loopback) +{ + int ret; + struct mlx4_update_qp_params qp_params; + + memset(&qp_params, 0, sizeof(qp_params)); + if (!loopback) + qp_params.flags = MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB; + + ret = mlx4_update_qp(priv->mdev->dev, qp->qpn, + MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB, + &qp_params); + + return ret; +} int mlx4_en_map_buffer(struct mlx4_buf *buf) { diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 80aac2010..e7a5000aa 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -246,7 +246,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring) { - BUG_ON((u32)(ring->prod - ring->cons) > ring->actual_size); return ring->prod == ring->cons; } @@ -337,15 +336,10 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) struct mlx4_dev *dev = mdev->dev; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { - if (!dev->caps.comp_pool) - num_of_eqs = max_t(int, MIN_RX_RINGS, - min_t(int, - dev->caps.num_comp_vectors, - DEF_RX_RINGS)); - else - num_of_eqs = min_t(int, MAX_MSIX_P_PORT, - dev->caps.comp_pool/ - dev->caps.num_ports) - 1; + num_of_eqs = max_t(int, MIN_RX_RINGS, + min_t(int, + mlx4_get_eqs_per_port(mdev->dev, i), + DEF_RX_RINGS)); num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : min_t(int, num_of_eqs, @@ -731,7 +725,7 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, hw_checksum = csum_unfold((__force __sum16)cqe->checksum); - if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) && + if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) { hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); hdr += sizeof(struct vlan_hdr); @@ -912,17 +906,25 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud gro_skb->csum_level = 1; if ((cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && + cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u16 vid = be16_to_cpu(cqe->sl_vid); __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); + } else if ((be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_SVLAN_PRESENT_MASK) && + (dev->features & NETIF_F_HW_VLAN_STAG_RX)) { + __vlan_hwaccel_put_tag(gro_skb, + htons(ETH_P_8021AD), + be16_to_cpu(cqe->sl_vid)); } if (dev->features & NETIF_F_RXHASH) skb_set_hash(gro_skb, be32_to_cpu(cqe->immed_rss_invalid), - PKT_HASH_TYPE_L3); + (ip_summed == CHECKSUM_UNNECESSARY) ? + PKT_HASH_TYPE_L4 : + PKT_HASH_TYPE_L3); skb_record_rx_queue(gro_skb, cq->ring); skb_mark_napi_id(gro_skb, &cq->napi); @@ -968,12 +970,19 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (dev->features & NETIF_F_RXHASH) skb_set_hash(skb, be32_to_cpu(cqe->immed_rss_invalid), - PKT_HASH_TYPE_L3); + (ip_summed == CHECKSUM_UNNECESSARY) ? + PKT_HASH_TYPE_L4 : + PKT_HASH_TYPE_L3); if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_VLAN_PRESENT_MASK) && + MLX4_CQE_CVLAN_PRESENT_MASK) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); + else if ((be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_SVLAN_PRESENT_MASK) && + (dev->features & NETIF_F_HW_VLAN_STAG_RX)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), + be16_to_cpu(cqe->sl_vid)); if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { timestamp = mlx4_en_get_cqe_ts(cqe); @@ -1038,13 +1047,15 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done == budget) { - int cpu_curr; const struct cpumask *aff; + struct irq_data *idata; + int cpu_curr; INC_PERF_COUNTER(priv->pstats.napi_quota); cpu_curr = smp_processor_id(); - aff = irq_desc_get_irq_data(cq->irq_desc)->affinity; + idata = irq_desc_get_irq_data(cq->irq_desc); + aff = irq_data_get_affinity_mask(idata); if (likely(cpumask_test_cpu(cpu_curr, aff))) return budget; @@ -1071,7 +1082,10 @@ static const int frag_sizes[] = { void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN; + /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple + * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). + */ + int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN); int buf_size = 0; int i = 0; diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/kernel/drivers/net/ethernet/mellanox/mlx4/en_tx.c index c10d98f6a..4421bf546 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -718,6 +718,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) u32 index, bf_index; __be32 op_own; u16 vlan_tag = 0; + u16 vlan_proto = 0; int i_frag; int lso_header_size; void *fragptr = NULL; @@ -750,9 +751,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; } - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { vlan_tag = skb_vlan_tag_get(skb); - + vlan_proto = be16_to_cpu(skb->vlan_proto); + } netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); @@ -958,8 +960,13 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ring->bf.offset ^= ring->bf.buf_size; } else { tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * - !!skb_vlan_tag_present(skb); + if (vlan_proto == ETH_P_8021AD) + tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; + else if (vlan_proto == ETH_P_8021Q) + tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; + else + tx_desc->ctrl.ins_vlan = 0; + tx_desc->ctrl.fence_size = real_size; /* Ensure new descriptor hits memory diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/eq.c b/kernel/drivers/net/ethernet/mellanox/mlx4/eq.c index 983b1d512..603d1c3d3 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -145,7 +145,7 @@ void mlx4_gen_slave_eqe(struct work_struct *work) struct mlx4_slave_event_eq *slave_eq = &mfunc->master.slave_eq; struct mlx4_eqe *eqe; u8 slave; - int i; + int i, phys_port, slave_port; for (eqe = next_slave_event_eqe(slave_eq); eqe; eqe = next_slave_event_eqe(slave_eq)) { @@ -154,9 +154,20 @@ void mlx4_gen_slave_eqe(struct work_struct *work) /* All active slaves need to receive the event */ if (slave == ALL_SLAVES) { for (i = 0; i <= dev->persist->num_vfs; i++) { + phys_port = 0; + if (eqe->type == MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT && + eqe->subtype == MLX4_DEV_PMC_SUBTYPE_PORT_INFO) { + phys_port = eqe->event.port_mgmt_change.port; + slave_port = mlx4_phys_to_slave_port(dev, i, phys_port); + if (slave_port < 0) /* VF doesn't have this port */ + continue; + eqe->event.port_mgmt_change.port = slave_port; + } if (mlx4_GEN_EQE(dev, i, eqe)) mlx4_warn(dev, "Failed to generate event for slave %d\n", i); + if (phys_port) + eqe->event.port_mgmt_change.port = phys_port; } } else { if (mlx4_GEN_EQE(dev, slave, eqe)) @@ -185,7 +196,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) return; } - memcpy(s_eqe, eqe, dev->caps.eqe_size - 1); + memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); s_eqe->slave_id = slave; /* ensure all information is written before setting the ownersip bit */ dma_wmb(); @@ -210,6 +221,22 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave, slave_event(dev, slave, eqe); } +#if defined(CONFIG_SMP) +static void mlx4_set_eq_affinity_hint(struct mlx4_priv *priv, int vec) +{ + int hint_err; + struct mlx4_dev *dev = &priv->dev; + struct mlx4_eq *eq = &priv->eq_table.eq[vec]; + + if (!eq->affinity_mask || cpumask_empty(eq->affinity_mask)) + return; + + hint_err = irq_set_affinity_hint(eq->irq, eq->affinity_mask); + if (hint_err) + mlx4_warn(dev, "irq_set_affinity_hint failed, err %d\n", hint_err); +} +#endif + int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) { struct mlx4_eqe eqe; @@ -224,7 +251,7 @@ int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; - eqe.event.port_mgmt_change.port = port; + eqe.event.port_mgmt_change.port = mlx4_phys_to_slave_port(dev, slave, port); return mlx4_GEN_EQE(dev, slave, &eqe); } @@ -241,7 +268,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; - eqe.event.port_mgmt_change.port = port; + eqe.event.port_mgmt_change.port = mlx4_phys_to_slave_port(dev, slave, port); return mlx4_GEN_EQE(dev, slave, &eqe); } @@ -251,6 +278,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change) { struct mlx4_eqe eqe; + u8 slave_port = mlx4_phys_to_slave_port(dev, slave, port); /*don't send if we don't have the that slave */ if (dev->persist->num_vfs < slave) @@ -259,7 +287,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; eqe.subtype = port_subtype_change; - eqe.event.port_change.port = cpu_to_be32(port << 28); + eqe.event.port_change.port = cpu_to_be32(slave_port << 28); mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__, port_subtype_change, slave, port); @@ -589,6 +617,10 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) { if (i == mlx4_master_func_num(dev)) continue; + eqe->event.port_change.port = + cpu_to_be32( + (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) + | (mlx4_phys_to_slave_port(dev, i, port) << 28)); mlx4_slave_event(dev, i, eqe); } } @@ -879,8 +911,8 @@ static int mlx4_num_eq_uar(struct mlx4_dev *dev) * we need to map, take the difference of highest index and * the lowest index we'll use and add 1. */ - return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs + - dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1; + return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 - + dev->caps.reserved_eqs / 4 + 1; } static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) @@ -1069,32 +1101,21 @@ static void mlx4_free_eq(struct mlx4_dev *dev, static void mlx4_free_irqs(struct mlx4_dev *dev) { struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table; - struct mlx4_priv *priv = mlx4_priv(dev); - int i, vec; + int i; if (eq_table->have_irq) free_irq(dev->persist->pdev->irq, dev); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) { + free_cpumask_var(eq_table->eq[i].affinity_mask); +#if defined(CONFIG_SMP) + irq_set_affinity_hint(eq_table->eq[i].irq, NULL); +#endif free_irq(eq_table->eq[i].irq, eq_table->eq + i); eq_table->eq[i].have_irq = 0; } - for (i = 0; i < dev->caps.comp_pool; i++) { - /* - * Freeing the assigned irq's - * all bits should be 0, but we need to validate - */ - if (priv->msix_ctl.pool_bm & 1ULL << i) { - /* NO need protecting*/ - vec = dev->caps.num_comp_vectors + 1 + i; - free_irq(priv->eq_table.eq[vec].irq, - &priv->eq_table.eq[vec]); - } - } - - kfree(eq_table->irq_names); } @@ -1175,76 +1196,73 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) } priv->eq_table.irq_names = - kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 + - dev->caps.comp_pool), + kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1), GFP_KERNEL); if (!priv->eq_table.irq_names) { err = -ENOMEM; - goto err_out_bitmap; + goto err_out_clr_int; } - for (i = 0; i < dev->caps.num_comp_vectors; ++i) { - err = mlx4_create_eq(dev, dev->caps.num_cqs - - dev->caps.reserved_cqs + - MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, - &priv->eq_table.eq[i]); - if (err) { - --i; - goto err_out_unmap; - } - } - - err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0, - &priv->eq_table.eq[dev->caps.num_comp_vectors]); - if (err) - goto err_out_comp; - - /*if additional completion vectors poolsize is 0 this loop will not run*/ - for (i = dev->caps.num_comp_vectors + 1; - i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) { + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { + if (i == MLX4_EQ_ASYNC) { + err = mlx4_create_eq(dev, + MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, + 0, &priv->eq_table.eq[MLX4_EQ_ASYNC]); + } else { + struct mlx4_eq *eq = &priv->eq_table.eq[i]; +#ifdef CONFIG_RFS_ACCEL + int port = find_first_bit(eq->actv_ports.ports, + dev->caps.num_ports) + 1; + + if (port <= dev->caps.num_ports) { + struct mlx4_port_info *info = + &mlx4_priv(dev)->port[port]; + + if (!info->rmap) { + info->rmap = alloc_irq_cpu_rmap( + mlx4_get_eqs_per_port(dev, port)); + if (!info->rmap) { + mlx4_warn(dev, "Failed to allocate cpu rmap\n"); + err = -ENOMEM; + goto err_out_unmap; + } + } - err = mlx4_create_eq(dev, dev->caps.num_cqs - - dev->caps.reserved_cqs + - MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, - &priv->eq_table.eq[i]); - if (err) { - --i; - goto err_out_unmap; + err = irq_cpu_rmap_add( + info->rmap, eq->irq); + if (err) + mlx4_warn(dev, "Failed adding irq rmap\n"); + } +#endif + err = mlx4_create_eq(dev, dev->caps.num_cqs - + dev->caps.reserved_cqs + + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? + i + 1 - !!(i > MLX4_EQ_ASYNC) : 0, + eq); } + if (err) + goto err_out_unmap; } - if (dev->flags & MLX4_FLAG_MSI_X) { const char *eq_name; - for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { - if (i < dev->caps.num_comp_vectors) { - snprintf(priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, - "mlx4-comp-%d@pci:%s", i, - pci_name(dev->persist->pdev)); - } else { - snprintf(priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, - "mlx4-async@pci:%s", - pci_name(dev->persist->pdev)); - } + snprintf(priv->eq_table.irq_names + + MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE, + MLX4_IRQNAME_SIZE, + "mlx4-async@pci:%s", + pci_name(dev->persist->pdev)); + eq_name = priv->eq_table.irq_names + + MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE; - eq_name = priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE; - err = request_irq(priv->eq_table.eq[i].irq, - mlx4_msi_x_interrupt, 0, eq_name, - priv->eq_table.eq + i); - if (err) - goto err_out_async; + err = request_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq, + mlx4_msi_x_interrupt, 0, eq_name, + priv->eq_table.eq + MLX4_EQ_ASYNC); + if (err) + goto err_out_unmap; - priv->eq_table.eq[i].have_irq = 1; - } + priv->eq_table.eq[MLX4_EQ_ASYNC].have_irq = 1; } else { snprintf(priv->eq_table.irq_names, MLX4_IRQNAME_SIZE, @@ -1253,36 +1271,38 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) err = request_irq(dev->persist->pdev->irq, mlx4_interrupt, IRQF_SHARED, priv->eq_table.irq_names, dev); if (err) - goto err_out_async; + goto err_out_unmap; priv->eq_table.have_irq = 1; } err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); if (err) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); - for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) - eq_set_ci(&priv->eq_table.eq[i], 1); + /* arm ASYNC eq */ + eq_set_ci(&priv->eq_table.eq[MLX4_EQ_ASYNC], 1); return 0; -err_out_async: - mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]); - -err_out_comp: - i = dev->caps.num_comp_vectors - 1; - err_out_unmap: - while (i >= 0) { - mlx4_free_eq(dev, &priv->eq_table.eq[i]); - --i; + while (i >= 0) + mlx4_free_eq(dev, &priv->eq_table.eq[i--]); +#ifdef CONFIG_RFS_ACCEL + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_priv(dev)->port[i].rmap) { + free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap); + mlx4_priv(dev)->port[i].rmap = NULL; + } } +#endif + mlx4_free_irqs(dev); + +err_out_clr_int: if (!mlx4_is_slave(dev)) mlx4_unmap_clr_int(dev); - mlx4_free_irqs(dev); err_out_bitmap: mlx4_unmap_uar(dev); @@ -1300,11 +1320,19 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) int i; mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); +#ifdef CONFIG_RFS_ACCEL + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_priv(dev)->port[i].rmap) { + free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap); + mlx4_priv(dev)->port[i].rmap = NULL; + } + } +#endif mlx4_free_irqs(dev); - for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); if (!mlx4_is_slave(dev)) @@ -1336,6 +1364,10 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) * and performing a NOP command */ for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) { + /* Make sure request_irq was called */ + if (!priv->eq_table.eq[i].have_irq) + continue; + /* Temporary use polling for command completions */ mlx4_cmd_use_polling(dev); @@ -1355,87 +1387,169 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) /* Return to default */ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); return err; } EXPORT_SYMBOL(mlx4_test_interrupts); -int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector) +bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector) { + struct mlx4_priv *priv = mlx4_priv(dev); + + vector = MLX4_CQ_TO_EQ_VECTOR(vector); + if (vector < 0 || (vector >= dev->caps.num_comp_vectors + 1) || + (vector == MLX4_EQ_ASYNC)) + return false; + + return test_bit(port - 1, priv->eq_table.eq[vector].actv_ports.ports); +} +EXPORT_SYMBOL(mlx4_is_eq_vector_valid); +u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port) +{ struct mlx4_priv *priv = mlx4_priv(dev); - int vec = 0, err = 0, i; + unsigned int i; + unsigned int sum = 0; + + for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) + sum += !!test_bit(port - 1, + priv->eq_table.eq[i].actv_ports.ports); + + return sum; +} +EXPORT_SYMBOL(mlx4_get_eqs_per_port); + +int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + vector = MLX4_CQ_TO_EQ_VECTOR(vector); + if (vector <= 0 || (vector >= dev->caps.num_comp_vectors + 1)) + return -EINVAL; + + return !!(bitmap_weight(priv->eq_table.eq[vector].actv_ports.ports, + dev->caps.num_ports) > 1); +} +EXPORT_SYMBOL(mlx4_is_eq_shared); + +struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port) +{ + return mlx4_priv(dev)->port[port].rmap; +} +EXPORT_SYMBOL(mlx4_get_cpu_rmap); + +int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err = 0, i = 0; + u32 min_ref_count_val = (u32)-1; + int requested_vector = MLX4_CQ_TO_EQ_VECTOR(*vector); + int *prequested_vector = NULL; + mutex_lock(&priv->msix_ctl.pool_lock); - for (i = 0; !vec && i < dev->caps.comp_pool; i++) { - if (~priv->msix_ctl.pool_bm & 1ULL << i) { - priv->msix_ctl.pool_bm |= 1ULL << i; - vec = dev->caps.num_comp_vectors + 1 + i; - snprintf(priv->eq_table.irq_names + - vec * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, "%s", name); -#ifdef CONFIG_RFS_ACCEL - if (rmap) { - err = irq_cpu_rmap_add(rmap, - priv->eq_table.eq[vec].irq); - if (err) - mlx4_warn(dev, "Failed adding irq rmap\n"); + if (requested_vector < (dev->caps.num_comp_vectors + 1) && + (requested_vector >= 0) && + (requested_vector != MLX4_EQ_ASYNC)) { + if (test_bit(port - 1, + priv->eq_table.eq[requested_vector].actv_ports.ports)) { + prequested_vector = &requested_vector; + } else { + struct mlx4_eq *eq; + + for (i = 1; i < port; + requested_vector += mlx4_get_eqs_per_port(dev, i++)) + ; + + eq = &priv->eq_table.eq[requested_vector]; + if (requested_vector < dev->caps.num_comp_vectors + 1 && + test_bit(port - 1, eq->actv_ports.ports)) { + prequested_vector = &requested_vector; } -#endif - err = request_irq(priv->eq_table.eq[vec].irq, - mlx4_msi_x_interrupt, 0, - &priv->eq_table.irq_names[vec<<5], - priv->eq_table.eq + vec); - if (err) { - /*zero out bit by fliping it*/ - priv->msix_ctl.pool_bm ^= 1 << i; - vec = 0; - continue; - /*we dont want to break here*/ + } + } + + if (!prequested_vector) { + requested_vector = -1; + for (i = 0; min_ref_count_val && i < dev->caps.num_comp_vectors + 1; + i++) { + struct mlx4_eq *eq = &priv->eq_table.eq[i]; + + if (min_ref_count_val > eq->ref_count && + test_bit(port - 1, eq->actv_ports.ports)) { + min_ref_count_val = eq->ref_count; + requested_vector = i; } + } + + if (requested_vector < 0) { + err = -ENOSPC; + goto err_unlock; + } + + prequested_vector = &requested_vector; + } + + if (!test_bit(*prequested_vector, priv->msix_ctl.pool_bm) && + dev->flags & MLX4_FLAG_MSI_X) { + set_bit(*prequested_vector, priv->msix_ctl.pool_bm); + snprintf(priv->eq_table.irq_names + + *prequested_vector * MLX4_IRQNAME_SIZE, + MLX4_IRQNAME_SIZE, "mlx4-%d@%s", + *prequested_vector, dev_name(&dev->persist->pdev->dev)); - eq_set_ci(&priv->eq_table.eq[vec], 1); + err = request_irq(priv->eq_table.eq[*prequested_vector].irq, + mlx4_msi_x_interrupt, 0, + &priv->eq_table.irq_names[*prequested_vector << 5], + priv->eq_table.eq + *prequested_vector); + + if (err) { + clear_bit(*prequested_vector, priv->msix_ctl.pool_bm); + *prequested_vector = -1; + } else { +#if defined(CONFIG_SMP) + mlx4_set_eq_affinity_hint(priv, *prequested_vector); +#endif + eq_set_ci(&priv->eq_table.eq[*prequested_vector], 1); + priv->eq_table.eq[*prequested_vector].have_irq = 1; } } + + if (!err && *prequested_vector >= 0) + priv->eq_table.eq[*prequested_vector].ref_count++; + +err_unlock: mutex_unlock(&priv->msix_ctl.pool_lock); - if (vec) { - *vector = vec; - } else { + if (!err && *prequested_vector >= 0) + *vector = MLX4_EQ_TO_CQ_VECTOR(*prequested_vector); + else *vector = 0; - err = (i == dev->caps.comp_pool) ? -ENOSPC : err; - } + return err; } EXPORT_SYMBOL(mlx4_assign_eq); -int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec) +int mlx4_eq_get_irq(struct mlx4_dev *dev, int cq_vec) { struct mlx4_priv *priv = mlx4_priv(dev); - return priv->eq_table.eq[vec].irq; + return priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq_vec)].irq; } EXPORT_SYMBOL(mlx4_eq_get_irq); void mlx4_release_eq(struct mlx4_dev *dev, int vec) { struct mlx4_priv *priv = mlx4_priv(dev); - /*bm index*/ - int i = vec - dev->caps.num_comp_vectors - 1; - - if (likely(i >= 0)) { - /*sanity check , making sure were not trying to free irq's - Belonging to a legacy EQ*/ - mutex_lock(&priv->msix_ctl.pool_lock); - if (priv->msix_ctl.pool_bm & 1ULL << i) { - free_irq(priv->eq_table.eq[vec].irq, - &priv->eq_table.eq[vec]); - priv->msix_ctl.pool_bm &= ~(1ULL << i); - } - mutex_unlock(&priv->msix_ctl.pool_lock); - } + int eq_vec = MLX4_CQ_TO_EQ_VECTOR(vec); + mutex_lock(&priv->msix_ctl.pool_lock); + priv->eq_table.eq[eq_vec].ref_count--; + + /* once we allocated EQ, we don't release it because it might be binded + * to cpu_rmap. + */ + mutex_unlock(&priv->msix_ctl.pool_lock); } EXPORT_SYMBOL(mlx4_release_eq); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/fw.c b/kernel/drivers/net/ethernet/mellanox/mlx4/fw.c index e30bf57ad..90db94e83 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -154,6 +154,9 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [26] = "Port ETS Scheduler support", [27] = "Port beacon support", [28] = "RX-ALL support", + [29] = "802.1ad offload support", + [31] = "Modifying loopback source checks using UPDATE_QP support", + [32] = "Loopback source checks support", }; int i; @@ -307,6 +310,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 #define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) +#define QUERY_FUNC_CAP_PHV_BIT 0x40 if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = @@ -351,6 +355,12 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); + if (dev->caps.phv_bit[port]) { + field = QUERY_FUNC_CAP_PHV_BIT; + MLX4_PUT(outbox->buf, field, + QUERY_FUNC_CAP_FLAGS0_OFFSET); + } + } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); @@ -600,6 +610,9 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(func_cap->phys_port_id, outbox, QUERY_FUNC_CAP_PHYS_PORT_ID); + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); + func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT); + /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index @@ -700,6 +713,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 #define QUERY_DEV_CAP_CONFIG_DEV_OFFSET 0x94 +#define QUERY_DEV_CAP_PHV_EN_OFFSET 0x96 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c @@ -898,6 +912,12 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; if (field & (1 << 2)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_IGNORE_FCS; + MLX4_GET(field, outbox, QUERY_DEV_CAP_PHV_EN_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PHV_EN; + if (field & 0x40) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN; + MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET); @@ -946,6 +966,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; + if (field32 & (1 << 18)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB; + if (field32 & (1 << 19)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK; if (field32 & (1 << 26)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; if (field32 & (1 << 20)) @@ -1992,6 +2016,10 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + /* phv_check enable */ + MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); + if (byte_field & 0x2) + param->phv_check_en = 1; out: mlx4_free_cmd_mailbox(dev, mailbox); @@ -2758,3 +2786,79 @@ int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave, 0, MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } + +static int mlx4_SET_PORT_phv_bit(struct mlx4_dev *dev, u8 port, u8 phv_bit) +{ +#define SET_PORT_GEN_PHV_VALID 0x10 +#define SET_PORT_GEN_PHV_EN 0x80 + + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + + context->v_ignore_fcs |= SET_PORT_GEN_PHV_VALID; + if (phv_bit) + context->phv_en |= SET_PORT_GEN_PHV_EN; + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv) +{ + int err; + struct mlx4_func_cap func_cap; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) + *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT; + return err; +} +EXPORT_SYMBOL(get_phv_bit); + +int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) +{ + int ret; + + if (mlx4_is_slave(dev)) + return -EPERM; + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) { + ret = mlx4_SET_PORT_phv_bit(dev, port, new_val); + if (!ret) + dev->caps.phv_bit[port] = new_val; + return ret; + } + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(set_phv_bit); + +void mlx4_replace_zero_macs(struct mlx4_dev *dev) +{ + int i; + u8 mac_addr[ETH_ALEN]; + + dev->port_random_macs = 0; + for (i = 1; i <= dev->caps.num_ports; ++i) + if (!dev->caps.def_mac[i] && + dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { + eth_random_addr(mac_addr); + dev->port_random_macs |= 1 << i; + dev->caps.def_mac[i] = mlx4_mac_to_u64(mac_addr); + } +} +EXPORT_SYMBOL_GPL(mlx4_replace_zero_macs); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/fw.h b/kernel/drivers/net/ethernet/mellanox/mlx4/fw.h index 07cb7c246..08de5555c 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -204,6 +204,7 @@ struct mlx4_init_hca_param { u16 cqe_size; /* For use only when CQE stride feature enabled */ u16 eqe_size; /* For use only when EQE stride feature enabled */ u8 rss_ip_frags; + u8 phv_check_en; /* for QUERY_HCA */ }; struct mlx4_init_ib_param { diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/intf.c b/kernel/drivers/net/ethernet/mellanox/mlx4/intf.c index 0d80aed59..0472941af 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -63,8 +63,11 @@ static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv) spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); spin_unlock_irq(&priv->ctx_lock); + if (intf->activate) + intf->activate(&priv->dev, dev_ctx->context); } else kfree(dev_ctx); + } static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *priv) diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/main.c b/kernel/drivers/net/ethernet/mellanox/mlx4/main.c index ced5ecab5..31c491e02 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/main.c @@ -405,6 +405,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) { + struct mlx4_init_hca_param hca_param; + + memset(&hca_param, 0, sizeof(hca_param)); + err = mlx4_QUERY_HCA(dev, &hca_param); + /* Turn off PHV_EN flag in case phv_check_en is set. + * phv_check_en is a HW check that parse the packet and verify + * phv bit was reported correctly in the wqe. To allow QinQ + * PHV_EN flag should be set and phv_check_en must be cleared + * otherwise QinQ packets will be drop by the HW. + */ + if (err || hca_param.phv_check_en) + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN; + } + /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; @@ -479,7 +494,15 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } } - dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters); + if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) && + (port_type_array[0] == MLX4_PORT_TYPE_IB) && + (port_type_array[1] == MLX4_PORT_TYPE_ETH)) { + mlx4_warn(dev, + "Granular QoS per VF not supported with IB/Eth configuration\n"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP; + } + + dev->caps.max_counters = dev_cap->max_counters; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = @@ -840,6 +863,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + mlx4_replace_zero_macs(dev); + dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); @@ -867,9 +892,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.port_mask[i] = dev->caps.port_type[i]; dev->caps.phys_port_id[i] = func_cap.phys_port_id; - if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, - &dev->caps.gid_table_len[i], - &dev->caps.pkey_table_len[i])) + err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, + &dev->caps.gid_table_len[i], + &dev->caps.pkey_table_len[i]); + if (err) goto err_mem; } @@ -881,6 +907,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->persist->pdev, 2)); + err = -ENOMEM; goto err_mem; } @@ -1674,6 +1701,25 @@ static int map_internal_clock(struct mlx4_dev *dev) return 0; } +int mlx4_get_internal_clock_params(struct mlx4_dev *dev, + struct mlx4_clock_params *params) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (mlx4_is_slave(dev)) + return -ENOTSUPP; + + if (!params) + return -EINVAL; + + params->bar = priv->fw.clock_bar; + params->offset = priv->fw.clock_offset; + params->size = MLX4_CLOCK_SIZE; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); + static void unmap_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2193,20 +2239,78 @@ err_free_icm: static int mlx4_init_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int nent; + int nent_pow2; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; - nent = dev->caps.max_counters; - return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0); + if (!dev->caps.max_counters) + return -ENOSPC; + + nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); + /* reserve last counter index for sink counter */ + return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2, + nent_pow2 - 1, 0, + nent_pow2 - dev->caps.max_counters + 1); } static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) { + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) + return; + + if (!dev->caps.max_counters) + return; + mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap); } +static void mlx4_cleanup_default_counters(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port; + + for (port = 0; port < dev->caps.num_ports; port++) + if (priv->def_counter[port] != -1) + mlx4_counter_free(dev, priv->def_counter[port]); +} + +static int mlx4_allocate_default_counters(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port, err = 0; + u32 idx; + + for (port = 0; port < dev->caps.num_ports; port++) + priv->def_counter[port] = -1; + + for (port = 0; port < dev->caps.num_ports; port++) { + err = mlx4_counter_alloc(dev, &idx); + + if (!err || err == -ENOSPC) { + priv->def_counter[port] = idx; + } else if (err == -ENOENT) { + err = 0; + continue; + } else if (mlx4_is_slave(dev) && err == -EINVAL) { + priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev); + mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n", + MLX4_SINK_COUNTER_INDEX(dev)); + err = 0; + } else { + mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n", + __func__, port + 1, err); + mlx4_cleanup_default_counters(dev); + return err; + } + + mlx4_dbg(dev, "%s: default counter index %d for port %d\n", + __func__, priv->def_counter[port], port + 1); + } + + return err; +} + int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2215,8 +2319,10 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) return -ENOENT; *idx = mlx4_bitmap_alloc(&priv->counters_bitmap); - if (*idx == -1) - return -ENOMEM; + if (*idx == -1) { + *idx = MLX4_SINK_COUNTER_INDEX(dev); + return -ENOSPC; + } return 0; } @@ -2239,8 +2345,35 @@ int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) } EXPORT_SYMBOL_GPL(mlx4_counter_alloc); +static int __mlx4_clear_if_stat(struct mlx4_dev *dev, + u8 counter_index) +{ + struct mlx4_cmd_mailbox *if_stat_mailbox; + int err; + u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET; + + if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(if_stat_mailbox)) + return PTR_ERR(if_stat_mailbox); + + err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, + MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, if_stat_mailbox); + return err; +} + void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) { + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) + return; + + if (idx == MLX4_SINK_COUNTER_INDEX(dev)) + return; + + __mlx4_clear_if_stat(dev, idx); + mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR); return; } @@ -2260,6 +2393,14 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) } EXPORT_SYMBOL_GPL(mlx4_counter_free); +int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return priv->def_counter[port - 1]; +} +EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index); + void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2364,11 +2505,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) if (err) { if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].irq); + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); mlx4_warn(dev, "Trying again without MSI-X\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].irq); + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } @@ -2395,10 +2536,18 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_srq_table_free; } - err = mlx4_init_counters_table(dev); - if (err && err != -ENOENT) { - mlx4_err(dev, "Failed to initialize counters table, aborting\n"); - goto err_qp_table_free; + if (!mlx4_is_slave(dev)) { + err = mlx4_init_counters_table(dev); + if (err && err != -ENOENT) { + mlx4_err(dev, "Failed to initialize counters table, aborting\n"); + goto err_qp_table_free; + } + } + + err = mlx4_allocate_default_counters(dev); + if (err) { + mlx4_err(dev, "Failed to allocate default counters, aborting\n"); + goto err_counters_table_free; } if (!mlx4_is_slave(dev)) { @@ -2432,15 +2581,19 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) if (err) { mlx4_err(dev, "Failed to set port %d, aborting\n", port); - goto err_counters_table_free; + goto err_default_countes_free; } } } return 0; +err_default_countes_free: + mlx4_cleanup_default_counters(dev); + err_counters_table_free: - mlx4_cleanup_counters_table(dev); + if (!mlx4_is_slave(dev)) + mlx4_cleanup_counters_table(dev); err_qp_table_free: mlx4_cleanup_qp_table(dev); @@ -2481,17 +2634,50 @@ err_uar_table_free: return err; } +static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn) +{ + int requested_cpu = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_eq *eq; + int off = 0; + int i; + + if (eqn > dev->caps.num_comp_vectors) + return -EINVAL; + + for (i = 1; i < port; i++) + off += mlx4_get_eqs_per_port(dev, i); + + requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC); + + /* Meaning EQs are shared, and this call comes from the second port */ + if (requested_cpu < 0) + return 0; + + eq = &priv->eq_table.eq[eqn]; + + if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_set_cpu(requested_cpu, eq->affinity_mask); + + return 0; +} + static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int i; + int port = 0; if (msi_x) { - int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ; + int nreq = dev->caps.num_ports * num_online_cpus() + 1; nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); + if (nreq > MAX_MSIX) + nreq = MAX_MSIX; entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); if (!entries) @@ -2503,20 +2689,55 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, nreq); - if (nreq < 0) { + if (nreq < 0 || nreq < MLX4_EQ_ASYNC) { kfree(entries); goto no_msi; - } else if (nreq < MSIX_LEGACY_SZ + - dev->caps.num_ports * MIN_MSIX_P_PORT) { - /*Working in legacy mode , all EQ's shared*/ - dev->caps.comp_pool = 0; - dev->caps.num_comp_vectors = nreq - 1; - } else { - dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; - dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; } - for (i = 0; i < nreq; ++i) - priv->eq_table.eq[i].irq = entries[i].vector; + /* 1 is reserved for events (asyncrounous EQ) */ + dev->caps.num_comp_vectors = nreq - 1; + + priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector; + bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports, + dev->caps.num_ports); + + for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) { + if (i == MLX4_EQ_ASYNC) + continue; + + priv->eq_table.eq[i].irq = + entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector; + + if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) { + bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, + dev->caps.num_ports); + /* We don't set affinity hint when there + * aren't enough EQs + */ + } else { + set_bit(port, + priv->eq_table.eq[i].actv_ports.ports); + if (mlx4_init_affinity_hint(dev, port + 1, i)) + mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n", + i); + } + /* We divide the Eqs evenly between the two ports. + * (dev->caps.num_comp_vectors / dev->caps.num_ports) + * refers to the number of Eqs per port + * (i.e eqs_per_port). Theoretically, we would like to + * write something like (i + 1) % eqs_per_port == 0. + * However, since there's an asynchronous Eq, we have + * to skip over it by comparing this condition to + * !!((i + 1) > MLX4_EQ_ASYNC). + */ + if ((dev->caps.num_comp_vectors > dev->caps.num_ports) && + ((i + 1) % + (dev->caps.num_comp_vectors / dev->caps.num_ports)) == + !!((i + 1) > MLX4_EQ_ASYNC)) + /* If dev->caps.num_comp_vectors < dev->caps.num_ports, + * everything is shared anyway. + */ + port++; + } dev->flags |= MLX4_FLAG_MSI_X; @@ -2526,10 +2747,15 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) no_msi: dev->caps.num_comp_vectors = 1; - dev->caps.comp_pool = 0; - for (i = 0; i < 2; ++i) + BUG_ON(MLX4_EQ_ASYNC >= 2); + for (i = 0; i < 2; ++i) { priv->eq_table.eq[i].irq = dev->persist->pdev->irq; + if (i != MLX4_EQ_ASYNC) { + bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, + dev->caps.num_ports); + } + } } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) @@ -2594,6 +2820,10 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); +#ifdef CONFIG_RFS_ACCEL + free_irq_cpu_rmap(info->rmap); + info->rmap = NULL; +#endif } static int mlx4_init_steering(struct mlx4_dev *dev) @@ -2703,6 +2933,8 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, { u64 dev_flags = dev->flags; int err = 0; + int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev), + MLX4_MAX_NUM_VF); if (reset_flow) { dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), @@ -2728,6 +2960,12 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, } if (!(dev->flags & MLX4_FLAG_SRIOV)) { + if (total_vfs > fw_enabled_sriov_vfs) { + mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n", + total_vfs, fw_enabled_sriov_vfs); + err = -ENOMEM; + goto disable_sriov; + } mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs); err = pci_enable_sriov(pdev, total_vfs); } @@ -2749,6 +2987,7 @@ disable_sriov: free_mem: dev->persist->num_vfs = 0; kfree(dev->dev_vfs); + dev->dev_vfs = NULL; return dev_flags & ~MLX4_FLAG_MASTER; } @@ -2900,6 +3139,7 @@ slave_start: existing_vfs, reset_flow); + mlx4_close_fw(dev); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); dev->flags = dev_flags; if (!SRIOV_VALID_STATE(dev->flags)) { @@ -2988,18 +3228,6 @@ slave_start: /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ if (mlx4_is_master(dev)) { - int ib_ports = 0; - - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) - ib_ports++; - - if (ib_ports && - (num_vfs_argc > 1 || probe_vfs_argc > 1)) { - mlx4_err(dev, - "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n"); - err = -EINVAL; - goto err_close; - } if (dev->caps.num_ports < 2 && num_vfs_argc > 1) { err = -EINVAL; @@ -3036,7 +3264,7 @@ slave_start: if (err) goto err_master_mfunc; - priv->msix_ctl.pool_bm = 0; + bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX); mutex_init(&priv->msix_ctl.pool_lock); mlx4_enable_msi_x(dev); @@ -3058,7 +3286,6 @@ slave_start: !mlx4_is_mfunc(dev)) { dev->flags &= ~MLX4_FLAG_MSI_X; dev->caps.num_comp_vectors = 1; - dev->caps.comp_pool = 0; pci_disable_msix(pdev); err = mlx4_setup_hca(dev); } @@ -3109,7 +3336,9 @@ err_port: for (--port; port >= 1; --port) mlx4_cleanup_port_info(&priv->port[port]); - mlx4_cleanup_counters_table(dev); + mlx4_cleanup_default_counters(dev); + if (!mlx4_is_slave(dev)) + mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); @@ -3218,20 +3447,20 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, goto err_disable_pdev; } } - if (total_vfs >= MLX4_MAX_NUM_VF) { + if (total_vfs > MLX4_MAX_NUM_VF) { dev_err(&pdev->dev, - "Requested more VF's (%d) than allowed (%d)\n", - total_vfs, MLX4_MAX_NUM_VF - 1); + "Requested more VF's (%d) than allowed by hw (%d)\n", + total_vfs, MLX4_MAX_NUM_VF); err = -EINVAL; goto err_disable_pdev; } for (i = 0; i < MLX4_MAX_PORTS; i++) { - if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) { + if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) { dev_err(&pdev->dev, - "Requested more VF's (%d) for port (%d) than allowed (%d)\n", + "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n", nvfs[i] + nvfs[2], i + 1, - MLX4_MAX_NUM_VF_P_PORT - 1); + MLX4_MAX_NUM_VF_P_PORT); err = -EINVAL; goto err_disable_pdev; } @@ -3407,7 +3636,9 @@ static void mlx4_unload_one(struct pci_dev *pdev) mlx4_free_resource_tracker(dev, RES_TR_FREE_SLAVES_ONLY); - mlx4_cleanup_counters_table(dev); + mlx4_cleanup_default_counters(dev); + if (!mlx4_is_slave(dev)) + mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/mcg.c b/kernel/drivers/net/ethernet/mellanox/mlx4/mcg.c index bd9ea0d01..1d4e2e054 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1184,10 +1184,11 @@ out: if (prot == MLX4_PROT_ETH) { /* manage the steering entry for promisc mode */ if (new_entry) - new_steering_entry(dev, port, steer, index, qp->qpn); + err = new_steering_entry(dev, port, steer, + index, qp->qpn); else - existing_steering_entry(dev, port, steer, - index, qp->qpn); + err = existing_steering_entry(dev, port, steer, + index, qp->qpn); } if (err && link && index != -1) { if (index < dev->caps.num_mgms) diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 502d3dd2c..e1cf9036a 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -65,6 +65,8 @@ #define INIT_HCA_TPT_MW_ENABLE (1 << 7) +#define MLX4_QUERY_IF_STAT_RESET BIT(31) + enum { MLX4_HCR_BASE = 0x80680, MLX4_HCR_SIZE = 0x0001c, @@ -287,6 +289,12 @@ struct mlx4_icm_table { #define MLX4_CQE_SIZE_MASK_STRIDE 0x3 #define MLX4_EQE_SIZE_MASK_STRIDE 0x30 +#define MLX4_EQ_ASYNC 0 +#define MLX4_EQ_TO_CQ_VECTOR(vector) ((vector) - \ + !!((int)(vector) >= MLX4_EQ_ASYNC)) +#define MLX4_CQ_TO_EQ_VECTOR(vector) ((vector) + \ + !!((int)(vector) >= MLX4_EQ_ASYNC)) + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ @@ -391,6 +399,9 @@ struct mlx4_eq { struct mlx4_buf_list *page_list; struct mlx4_mtt mtt; struct mlx4_eq_tasklet tasklet_ctx; + struct mlx4_active_ports actv_ports; + u32 ref_count; + cpumask_var_t affinity_mask; }; struct mlx4_slave_eqe { @@ -776,6 +787,9 @@ struct mlx4_set_port_general_context { u8 pprx; u8 pfcrx; u16 reserved4; + u32 reserved5; + u8 phv_en; + u8 reserved6[3]; }; struct mlx4_set_port_rqp_calc_context { @@ -808,6 +822,7 @@ struct mlx4_port_info { struct mlx4_vlan_table vlan_table; struct mlx4_roce_gid_table gid_table; int base_qpn; + struct cpu_rmap *rmap; }; struct mlx4_sense { @@ -818,7 +833,7 @@ struct mlx4_sense { }; struct mlx4_msix_ctl { - u64 pool_bm; + DECLARE_BITMAP(pool_bm, MAX_MSIX); struct mutex pool_lock; }; @@ -864,6 +879,7 @@ struct mlx4_priv { struct mlx4_qp_table qp_table; struct mlx4_mcg_table mcg_table; struct mlx4_bitmap counters_bitmap; + int def_counter[MLX4_MAX_PORTS]; struct mlx4_catas_err catas_err; @@ -997,6 +1013,8 @@ int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port, + struct mlx4_counter *data); int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); @@ -1360,6 +1378,8 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); void mlx4_init_quotas(struct mlx4_dev *dev); +/* for VFs, replace zero MACs with randomly-generated MACs at driver start */ +void mlx4_replace_zero_macs(struct mlx4_dev *dev); int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 909fcf803..c41f15102 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -95,6 +95,7 @@ */ #define MLX4_EN_PRIV_FLAGS_BLUEFLAME 1 +#define MLX4_EN_PRIV_FLAGS_PHV 2 #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ) @@ -339,7 +340,7 @@ struct mlx4_en_cq { struct napi_struct napi; int size; int buf_size; - unsigned vector; + int vector; enum cq_type is_tx; u16 moder_time; u16 moder_cnt; @@ -567,6 +568,7 @@ struct mlx4_en_priv { #endif struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; + struct mlx4_en_counter_stats pf_stats; struct mlx4_en_flow_stats_rx rx_priority_flowstats[MLX4_NUM_PRIORITIES]; struct mlx4_en_flow_stats_tx tx_priority_flowstats[MLX4_NUM_PRIORITIES]; struct mlx4_en_flow_stats_rx rx_flowstats; @@ -582,6 +584,7 @@ struct mlx4_en_priv { struct device *ddev; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; struct hwtstamp_config hwtstamp_config; + u32 counter_index; #ifdef CONFIG_MLX4_EN_DCB struct ieee_ets ets; @@ -795,7 +798,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); int mlx4_en_map_buffer(struct mlx4_buf *buf); void mlx4_en_unmap_buffer(struct mlx4_buf *buf); - +int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, + int loopback); void mlx4_en_calc_rx_buf(struct net_device *dev); int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index 00555832a..7fd466c0b 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -23,6 +23,14 @@ struct mlx4_en_pkt_stats { #define NUM_PKT_STATS 43 }; +struct mlx4_en_counter_stats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; +#define NUM_PF_STATS 4 +}; + struct mlx4_en_port_stats { unsigned long tso_packets; unsigned long xmit_more; @@ -99,7 +107,7 @@ enum { }; #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \ - NUM_FLOW_STATS + NUM_PERF_STATS) + NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS) #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \ sizeof(((struct net_device_stats *)0)->n)) diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/mr.c b/kernel/drivers/net/ethernet/mellanox/mlx4/mr.c index 78f51e103..93195191f 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -318,7 +318,7 @@ int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, key, NULL); } else { mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR_OR_NULL(mailbox)) + if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, key, diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/profile.c b/kernel/drivers/net/ethernet/mellanox/mlx4/profile.c index 2bf437aaf..bae8b22ed 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/profile.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -82,7 +82,6 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, u64 total_size = 0; struct mlx4_resource *profile; - struct mlx4_resource tmp; struct sysinfo si; int i, j; @@ -149,11 +148,8 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, */ for (i = MLX4_RES_NUM; i > 0; --i) for (j = 1; j < i; ++j) { - if (profile[j].size > profile[j - 1].size) { - tmp = profile[j]; - profile[j] = profile[j - 1]; - profile[j - 1] = tmp; - } + if (profile[j].size > profile[j - 1].size) + swap(profile[j], profile[j - 1]); } for (i = 0; i < MLX4_RES_NUM; ++i) { diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/qp.c b/kernel/drivers/net/ethernet/mellanox/mlx4/qp.c index b75214a80..168823dde 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -422,20 +422,37 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, u64 qp_mask = 0; int err = 0; + if (!attr || (attr & ~MLX4_UPDATE_QP_SUPPORTED_ATTRS)) + return -EINVAL; + mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); cmd = (struct mlx4_update_qp_context *)mailbox->buf; - if (!attr || (attr & ~MLX4_UPDATE_QP_SUPPORTED_ATTRS)) - return -EINVAL; - if (attr & MLX4_UPDATE_QP_SMAC) { pri_addr_path_mask |= 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX; cmd->qp_context.pri_path.grh_mylmc = params->smac_index; } + if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) { + if (!(dev->caps.flags2 + & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { + mlx4_warn(dev, + "Trying to set src check LB, but it isn't supported\n"); + err = -ENOTSUPP; + goto out; + } + pri_addr_path_mask |= + 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB; + if (params->flags & + MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) { + cmd->qp_context.pri_path.fl |= + MLX4_FL_ETH_SRC_CHECK_MC_LB; + } + } + if (attr & MLX4_UPDATE_QP_VSD) { qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD; if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE) @@ -458,7 +475,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0, MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - +out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } @@ -749,7 +766,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) { int sort[MLX4_NUM_QP_REGION]; - int i, j, tmp; + int i, j; int last_base = dev->caps.num_qps; for (i = 1; i < MLX4_NUM_QP_REGION; ++i) @@ -758,11 +775,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) for (i = MLX4_NUM_QP_REGION; i > MLX4_QP_REGION_BOTTOM; --i) { for (j = MLX4_QP_REGION_BOTTOM + 2; j < i; ++j) { if (dev->caps.reserved_qps_cnt[sort[j]] > - dev->caps.reserved_qps_cnt[sort[j - 1]]) { - tmp = sort[j]; - sort[j] = sort[j - 1]; - sort[j - 1] = tmp; - } + dev->caps.reserved_qps_cnt[sort[j - 1]]) + swap(sort[j], sort[j - 1]); } } diff --git a/kernel/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/kernel/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index bafe2180c..cad6c44df 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -46,8 +46,11 @@ #include "mlx4.h" #include "fw.h" +#include "mlx4_stats.h" #define MLX4_MAC_VALID (1ull << 63) +#define MLX4_PF_COUNTERS_PER_PORT 2 +#define MLX4_VF_COUNTERS_PER_PORT 1 struct mac_res { struct list_head list; @@ -459,11 +462,21 @@ void mlx4_init_quotas(struct mlx4_dev *dev) dev->quotas.mpt = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; } + +static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev) +{ + /* reduce the sink counter */ + return (dev->caps.max_counters - 1 - + (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS)) + / MLX4_MAX_PORTS; +} + int mlx4_init_resource_tracker(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i, j; int t; + int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev); priv->mfunc.master.res_tracker.slave_list = kzalloc(dev->num_slaves * sizeof(struct slave_list), @@ -499,6 +512,9 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) res_alloc->allocated = kzalloc((dev->persist-> num_vfs + 1) * sizeof(int), GFP_KERNEL); + /* Reduce the sink counter */ + if (i == RES_COUNTER) + res_alloc->res_free = dev->caps.max_counters - 1; if (!res_alloc->quota || !res_alloc->guaranteed || !res_alloc->allocated) @@ -577,9 +593,17 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; case RES_COUNTER: res_alloc->quota[t] = dev->caps.max_counters; - res_alloc->guaranteed[t] = 0; if (t == mlx4_master_func_num(dev)) - res_alloc->res_free = res_alloc->quota[t]; + res_alloc->guaranteed[t] = + MLX4_PF_COUNTERS_PER_PORT * + MLX4_MAX_PORTS; + else if (t <= max_vfs_guarantee_counter) + res_alloc->guaranteed[t] = + MLX4_VF_COUNTERS_PER_PORT * + MLX4_MAX_PORTS; + else + res_alloc->guaranteed[t] = 0; + res_alloc->res_free -= res_alloc->guaranteed[t]; break; default: break; @@ -700,6 +724,9 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, } } +static int handle_counter(struct mlx4_dev *dev, struct mlx4_qp_context *qpc, + u8 slave, int port); + static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, u8 slave, u32 qpn) @@ -715,6 +742,10 @@ static int update_vport_qp_param(struct mlx4_dev *dev, vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; + err = handle_counter(dev, qpc, slave, port); + if (err) + goto out; + if (MLX4_VGT != vp_oper->state.default_vlan) { /* the reserved QPs (special, proxy, tunnel) * do not operate over vlans @@ -739,9 +770,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev, } } + /* preserve IF_COUNTER flag */ + qpc->pri_path.vlan_control &= + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE && dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) { - qpc->pri_path.vlan_control = + qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED | @@ -749,12 +783,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev, MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } else if (0 != vp_oper->state.default_vlan) { - qpc->pri_path.vlan_control = + qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; } else { /* priority tagged */ - qpc->pri_path.vlan_control = + qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } @@ -859,6 +893,83 @@ static void put_res(struct mlx4_dev *dev, int slave, u64 res_id, spin_unlock_irq(mlx4_tlock(dev)); } +static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, + u64 in_param, u64 *out_param, int port); + +static int handle_existing_counter(struct mlx4_dev *dev, u8 slave, int port, + int counter_index) +{ + struct res_common *r; + struct res_counter *counter; + int ret = 0; + + if (counter_index == MLX4_SINK_COUNTER_INDEX(dev)) + return ret; + + spin_lock_irq(mlx4_tlock(dev)); + r = find_res(dev, counter_index, RES_COUNTER); + if (!r || r->owner != slave) + ret = -EINVAL; + counter = container_of(r, struct res_counter, com); + if (!counter->port) + counter->port = port; + + spin_unlock_irq(mlx4_tlock(dev)); + return ret; +} + +static int handle_unexisting_counter(struct mlx4_dev *dev, + struct mlx4_qp_context *qpc, u8 slave, + int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *tmp; + struct res_counter *counter; + u64 counter_idx = MLX4_SINK_COUNTER_INDEX(dev); + int err = 0; + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry(tmp, + &tracker->slave_list[slave].res_list[RES_COUNTER], + list) { + counter = container_of(tmp, struct res_counter, com); + if (port == counter->port) { + qpc->pri_path.counter_index = counter->com.res_id; + spin_unlock_irq(mlx4_tlock(dev)); + return 0; + } + } + spin_unlock_irq(mlx4_tlock(dev)); + + /* No existing counter, need to allocate a new counter */ + err = counter_alloc_res(dev, slave, RES_OP_RESERVE, 0, 0, &counter_idx, + port); + if (err == -ENOENT) { + err = 0; + } else if (err && err != -ENOSPC) { + mlx4_err(dev, "%s: failed to create new counter for slave %d err %d\n", + __func__, slave, err); + } else { + qpc->pri_path.counter_index = counter_idx; + mlx4_dbg(dev, "%s: alloc new counter for slave %d index %d\n", + __func__, slave, qpc->pri_path.counter_index); + err = 0; + } + + return err; +} + +static int handle_counter(struct mlx4_dev *dev, struct mlx4_qp_context *qpc, + u8 slave, int port) +{ + if (qpc->pri_path.counter_index != MLX4_SINK_COUNTER_INDEX(dev)) + return handle_existing_counter(dev, slave, port, + qpc->pri_path.counter_index); + + return handle_unexisting_counter(dev, qpc, slave, port); +} + static struct res_common *alloc_qp_tr(int id) { struct res_qp *ret; @@ -952,7 +1063,7 @@ static struct res_common *alloc_srq_tr(int id) return &ret->com; } -static struct res_common *alloc_counter_tr(int id) +static struct res_common *alloc_counter_tr(int id, int port) { struct res_counter *ret; @@ -962,6 +1073,7 @@ static struct res_common *alloc_counter_tr(int id) ret->com.res_id = id; ret->com.state = RES_COUNTER_ALLOCATED; + ret->port = port; return &ret->com; } @@ -1022,7 +1134,7 @@ static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, pr_err("implementation missing\n"); return NULL; case RES_COUNTER: - ret = alloc_counter_tr(id); + ret = alloc_counter_tr(id, extra); break; case RES_XRCD: ret = alloc_xrcdn_tr(id); @@ -1039,6 +1151,53 @@ static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, return ret; } +int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port, + struct mlx4_counter *data) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *tmp; + struct res_counter *counter; + int *counters_arr; + int i = 0, err = 0; + + memset(data, 0, sizeof(*data)); + + counters_arr = kmalloc_array(dev->caps.max_counters, + sizeof(*counters_arr), GFP_KERNEL); + if (!counters_arr) + return -ENOMEM; + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry(tmp, + &tracker->slave_list[slave].res_list[RES_COUNTER], + list) { + counter = container_of(tmp, struct res_counter, com); + if (counter->port == port) { + counters_arr[i] = (int)tmp->res_id; + i++; + } + } + spin_unlock_irq(mlx4_tlock(dev)); + counters_arr[i] = -1; + + i = 0; + + while (counters_arr[i] != -1) { + err = mlx4_get_counter_stats(dev, counters_arr[i], data, + 0); + if (err) { + memset(data, 0, sizeof(*data)); + goto table_changed; + } + i++; + } + +table_changed: + kfree(counters_arr); + return 0; +} + static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { @@ -1082,8 +1241,10 @@ static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, return 0; undo: - for (--i; i >= base; --i) + for (--i; i >= 0; --i) { rb_erase(&res_arr[i]->node, root); + list_del_init(&res_arr[i]->list); + } spin_unlock_irq(mlx4_tlock(dev)); @@ -2001,7 +2162,7 @@ static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int port) { u32 index; int err; @@ -2019,7 +2180,7 @@ static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; } - err = add_res_range(dev, slave, index, 1, RES_COUNTER, 0); + err = add_res_range(dev, slave, index, 1, RES_COUNTER, port); if (err) { __mlx4_counter_free(dev, index); mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); @@ -2101,7 +2262,7 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_COUNTER: err = counter_alloc_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, 0); break; case RES_XRCD: @@ -2335,6 +2496,9 @@ static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, return -EINVAL; index = get_param_l(&in_param); + if (index == MLX4_SINK_COUNTER_INDEX(dev)) + return 0; + err = rem_res_range(dev, slave, index, 1, RES_COUNTER, 0); if (err) return err; @@ -2703,6 +2867,10 @@ static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr, context->qkey = cpu_to_be32(qkey); } +static int adjust_qp_sched_queue(struct mlx4_dev *dev, int slave, + struct mlx4_qp_context *qpc, + struct mlx4_cmd_mailbox *inbox); + int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -2725,6 +2893,10 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct res_srq *srq; int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff; + err = adjust_qp_sched_queue(dev, slave, qpc, inbox); + if (err) + return err; + err = qp_res_start_move_to(dev, slave, qpn, RES_QP_HW, &qp, 0); if (err) return err; @@ -3526,8 +3698,8 @@ static int adjust_qp_sched_queue(struct mlx4_dev *dev, int slave, pri_sched_queue = (qpc->pri_path.sched_queue & ~(1 << 6)) | ((port & 1) << 6); - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH || - mlx4_is_eth(dev, port + 1)) { + if (optpar & (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE) || + qpc->pri_path.sched_queue || mlx4_is_eth(dev, port + 1)) { qpc->pri_path.sched_queue = pri_sched_queue; } @@ -3595,9 +3767,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, qpc); orig_sched_queue = qpc->pri_path.sched_queue; - err = update_vport_qp_param(dev, inbox, slave, qpn); - if (err) - return err; err = get_res(dev, slave, qpn, RES_QP, &qp); if (err) @@ -3607,6 +3776,10 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, goto out; } + err = update_vport_qp_param(dev, inbox, slave, qpn); + if (err) + goto out; + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: /* if no error, save sched queue value passed in by VF. This is @@ -3965,6 +4138,22 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, return 0; } +static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} + /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. @@ -4025,7 +4214,9 @@ static int add_eth_header(struct mlx4_dev *dev, int slave, } -#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX) +#define MLX4_UPD_QP_PATH_MASK_SUPPORTED ( \ + 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX |\ + 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB) int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -4048,6 +4239,16 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED)) return -EPERM; + if ((pri_addr_path_mask & + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) && + !(dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { + mlx4_warn(dev, + "Src check LB for slave %d isn't supported\n", + slave); + return -ENOTSUPP; + } + /* Just change the smac for the QP */ err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) { @@ -4105,9 +4306,10 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; - ctrl->port = mlx4_slave_convert_port(dev, slave, ctrl->port); - if (ctrl->port <= 0) + err = mlx4_slave_convert_port(dev, slave, ctrl->port); + if (err <= 0) return -EINVAL; + ctrl->port = err; qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) { @@ -4117,6 +4319,12 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, rule_header = (struct _rule_hw *)(ctrl + 1); header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); + if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) + handle_eth_header_mcast_prio(ctrl, rule_header); + + if (slave == dev->caps.function) + goto execute; + switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: if (validate_eth_header_mac(slave, rule_header, rlist)) { @@ -4143,6 +4351,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, goto err_put; } +execute: err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, @@ -4744,26 +4953,41 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave) struct res_counter *counter; struct res_counter *tmp; int err; - int index; + int *counters_arr = NULL; + int i, j; err = move_all_busy(dev, slave, RES_COUNTER); if (err) mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n", slave); - spin_lock_irq(mlx4_tlock(dev)); - list_for_each_entry_safe(counter, tmp, counter_list, com.list) { - if (counter->com.owner == slave) { - index = counter->com.res_id; - rb_erase(&counter->com.node, - &tracker->res_tree[RES_COUNTER]); - list_del(&counter->com.list); - kfree(counter); - __mlx4_counter_free(dev, index); + counters_arr = kmalloc_array(dev->caps.max_counters, + sizeof(*counters_arr), GFP_KERNEL); + if (!counters_arr) + return; + + do { + i = 0; + j = 0; + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(counter, tmp, counter_list, com.list) { + if (counter->com.owner == slave) { + counters_arr[i++] = counter->com.res_id; + rb_erase(&counter->com.node, + &tracker->res_tree[RES_COUNTER]); + list_del(&counter->com.list); + kfree(counter); + } + } + spin_unlock_irq(mlx4_tlock(dev)); + + while (j < i) { + __mlx4_counter_free(dev, counters_arr[j++]); mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); } - } - spin_unlock_irq(mlx4_tlock(dev)); + } while (i); + + kfree(counters_arr); } static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/kernel/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 8ff57e8e3..158c88c69 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -3,6 +3,18 @@ # config MLX5_CORE - tristate + tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" depends on PCI default n + ---help--- + Core driver for low level functionality of the ConnectX-4 and + Connect-IB cards by Mellanox Technologies. + +config MLX5_CORE_EN + bool "Mellanox Technologies ConnectX-4 Ethernet support" + depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + default n + ---help--- + Ethernet support in Mellanox Technologies ConnectX-4 NIC. + Ethernet and Infiniband support in ConnectX-4 are currently mutually + exclusive. diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/kernel/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 105780bb9..26a68b8af 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,4 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o + mad.o transobj.o vport.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ + en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ + en_txrx.o diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index ac0f7bf4b..6cb383046 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -42,99 +42,65 @@ #include "mlx5_core.h" /* Handling for queue buffers -- we allocate a bunch of memory and - * register it in a memory region at HCA virtual address 0. If the - * requested size is > max_direct, we split the allocation into - * multiple pages, so we don't require too much contiguous memory. + * register it in a memory region at HCA virtual address 0. */ -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, - struct mlx5_buf *buf) +static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, + size_t size, dma_addr_t *dma_handle, + int node) +{ + struct mlx5_priv *priv = &dev->priv; + int original_node; + void *cpu_handle; + + mutex_lock(&priv->alloc_mutex); + original_node = dev_to_node(&dev->pdev->dev); + set_dev_node(&dev->pdev->dev, node); + cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size, + dma_handle, GFP_KERNEL); + set_dev_node(&dev->pdev->dev, original_node); + mutex_unlock(&priv->alloc_mutex); + return cpu_handle; +} + +int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_buf *buf, int node) { dma_addr_t t; buf->size = size; - if (size <= max_direct) { - buf->nbufs = 1; - buf->npages = 1; - buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev, - size, &t, GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; - - buf->direct.map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } - } else { - int i; - - buf->direct.buf = NULL; - buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; - buf->npages = buf->nbufs; - buf->page_shift = PAGE_SHIFT; - buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); - if (!buf->page_list) - return -ENOMEM; - - for (i = 0; i < buf->nbufs; i++) { - buf->page_list[i].buf = - dma_zalloc_coherent(&dev->pdev->dev, PAGE_SIZE, - &t, GFP_KERNEL); - if (!buf->page_list[i].buf) - goto err_free; - - buf->page_list[i].map = t; - } - - if (BITS_PER_LONG == 64) { - struct page **pages; - pages = kmalloc(sizeof(*pages) * buf->nbufs, GFP_KERNEL); - if (!pages) - goto err_free; - for (i = 0; i < buf->nbufs; i++) - pages[i] = virt_to_page(buf->page_list[i].buf); - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); - kfree(pages); - if (!buf->direct.buf) - goto err_free; - } + buf->npages = 1; + buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; + buf->direct.buf = mlx5_dma_zalloc_coherent_node(dev, size, + &t, node); + if (!buf->direct.buf) + return -ENOMEM; + + buf->direct.map = t; + + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; } return 0; +} -err_free: - mlx5_buf_free(dev, buf); - - return -ENOMEM; +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf) +{ + return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node); } EXPORT_SYMBOL_GPL(mlx5_buf_alloc); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) { - int i; - - if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, - buf->direct.map); - else { - if (BITS_PER_LONG == 64) - vunmap(buf->direct.buf); - - for (i = 0; i < buf->nbufs; i++) - if (buf->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); - } + dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, + buf->direct.map); } EXPORT_SYMBOL_GPL(mlx5_buf_free); -static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device) +static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, + int node) { struct mlx5_db_pgdir *pgdir; @@ -143,8 +109,9 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device) return NULL; bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE); - pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, GFP_KERNEL); + + pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, + &pgdir->db_dma, node); if (!pgdir->db_page) { kfree(pgdir); return NULL; @@ -177,7 +144,7 @@ static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, return 0; } -int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node) { struct mlx5_db_pgdir *pgdir; int ret = 0; @@ -188,7 +155,7 @@ int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) if (!mlx5_alloc_db_from_pgdir(pgdir, db)) goto out; - pgdir = mlx5_alloc_db_pgdir(&(dev->pdev->dev)); + pgdir = mlx5_alloc_db_pgdir(dev, node); if (!pgdir) { ret = -ENOMEM; goto out; @@ -204,6 +171,12 @@ out: return ret; } +EXPORT_SYMBOL_GPL(mlx5_db_alloc_node); + +int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + return mlx5_db_alloc_node(dev, db, dev->priv.numa_node); +} EXPORT_SYMBOL_GPL(mlx5_db_alloc); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) @@ -230,10 +203,7 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) int i; for (i = 0; i < buf->npages; i++) { - if (buf->nbufs == 1) - addr = buf->direct.map + (i << buf->page_shift); - else - addr = buf->page_list[i].map; + addr = buf->direct.map + (i << buf->page_shift); pas[i] = cpu_to_be64(addr); } diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e3273faf4..037fc4cdf 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include <asm-generic/kmap_types.h> +#include <linux/highmem.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/pci.h> @@ -75,25 +75,6 @@ enum { MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, }; -enum { - MLX5_CMD_STAT_OK = 0x0, - MLX5_CMD_STAT_INT_ERR = 0x1, - MLX5_CMD_STAT_BAD_OP_ERR = 0x2, - MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, - MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, - MLX5_CMD_STAT_BAD_RES_ERR = 0x5, - MLX5_CMD_STAT_RES_BUSY = 0x6, - MLX5_CMD_STAT_LIM_ERR = 0x8, - MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, - MLX5_CMD_STAT_IX_ERR = 0xa, - MLX5_CMD_STAT_NO_RES_ERR = 0xf, - MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, - MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, - MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, - MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, - MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, -}; - static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, @@ -273,6 +254,156 @@ static void dump_buf(void *buf, int size, int data_only, int offset) pr_debug("\n"); } +enum { + MLX5_DRIVER_STATUS_ABORTED = 0xfe, + MLX5_DRIVER_SYND = 0xbadd00de, +}; + +static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, + u32 *synd, u8 *status) +{ + *synd = 0; + *status = 0; + + switch (op) { + case MLX5_CMD_OP_TEARDOWN_HCA: + case MLX5_CMD_OP_DISABLE_HCA: + case MLX5_CMD_OP_MANAGE_PAGES: + case MLX5_CMD_OP_DESTROY_MKEY: + case MLX5_CMD_OP_DESTROY_EQ: + case MLX5_CMD_OP_DESTROY_CQ: + case MLX5_CMD_OP_DESTROY_QP: + case MLX5_CMD_OP_DESTROY_PSV: + case MLX5_CMD_OP_DESTROY_SRQ: + case MLX5_CMD_OP_DESTROY_XRC_SRQ: + case MLX5_CMD_OP_DESTROY_DCT: + case MLX5_CMD_OP_DEALLOC_Q_COUNTER: + case MLX5_CMD_OP_DEALLOC_PD: + case MLX5_CMD_OP_DEALLOC_UAR: + case MLX5_CMD_OP_DETTACH_FROM_MCG: + case MLX5_CMD_OP_DEALLOC_XRCD: + case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY: + case MLX5_CMD_OP_DESTROY_TIR: + case MLX5_CMD_OP_DESTROY_SQ: + case MLX5_CMD_OP_DESTROY_RQ: + case MLX5_CMD_OP_DESTROY_RMP: + case MLX5_CMD_OP_DESTROY_TIS: + case MLX5_CMD_OP_DESTROY_RQT: + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + case MLX5_CMD_OP_DESTROY_FLOW_GROUP: + case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: + return MLX5_CMD_STAT_OK; + + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_INIT_HCA: + case MLX5_CMD_OP_ENABLE_HCA: + case MLX5_CMD_OP_QUERY_PAGES: + case MLX5_CMD_OP_SET_HCA_CAP: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_SET_ISSI: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: + case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_CREATE_EQ: + case MLX5_CMD_OP_QUERY_EQ: + case MLX5_CMD_OP_GEN_EQE: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_SQD_RTS_QP: + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_CREATE_PSV: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_MODIFY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_SET_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_HCA_VPORT_GID: + case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_UAR: + case MLX5_CMD_OP_CONFIG_INT_MODERATION: + case MLX5_CMD_OP_ACCESS_REG: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_MAD_IFC: + case MLX5_CMD_OP_QUERY_MAD_DEMUX: + case MLX5_CMD_OP_SET_MAD_DEMUX: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_ALLOC_XRCD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_MODIFY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_MODIFY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_MODIFY_RQT: + case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + *status = MLX5_DRIVER_STATUS_ABORTED; + *synd = MLX5_DRIVER_SYND; + return -EIO; + default: + mlx5_core_err(dev, "Unknown FW command (%d)\n", op); + return -EINVAL; + } +} + const char *mlx5_command_str(int command) { switch (command) { @@ -390,8 +521,17 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_ARM_RQ: return "ARM_RQ"; - case MLX5_CMD_OP_RESIZE_SRQ: - return "RESIZE_SRQ"; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return "CREATE_XRC_SRQ"; + + case MLX5_CMD_OP_DESTROY_XRC_SRQ: + return "DESTROY_XRC_SRQ"; + + case MLX5_CMD_OP_QUERY_XRC_SRQ: + return "QUERY_XRC_SRQ"; + + case MLX5_CMD_OP_ARM_XRC_SRQ: + return "ARM_XRC_SRQ"; case MLX5_CMD_OP_ALLOC_PD: return "ALLOC_PD"; @@ -408,8 +548,8 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_ATTACH_TO_MCG: return "ATTACH_TO_MCG"; - case MLX5_CMD_OP_DETACH_FROM_MCG: - return "DETACH_FROM_MCG"; + case MLX5_CMD_OP_DETTACH_FROM_MCG: + return "DETTACH_FROM_MCG"; case MLX5_CMD_OP_ALLOC_XRCD: return "ALLOC_XRCD"; @@ -483,6 +623,7 @@ static void cmd_work_handler(struct work_struct *work) struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); struct mlx5_cmd_layout *lay; struct semaphore *sem; + unsigned long flags; sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); @@ -495,6 +636,9 @@ static void cmd_work_handler(struct work_struct *work) } } else { ent->idx = cmd->max_reg_cmds; + spin_lock_irqsave(&cmd->alloc_lock, flags); + clear_bit(ent->idx, &cmd->bitmask); + spin_unlock_irqrestore(&cmd->alloc_lock, flags); } ent->token = alloc_token(cmd); @@ -594,6 +738,16 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) return err; } +static __be32 *get_synd_ptr(struct mlx5_outbox_hdr *out) +{ + return &out->syndrome; +} + +static u8 *get_status_ptr(struct mlx5_outbox_hdr *out) +{ + return &out->status; +} + /* Notes: * 1. Callback functions may not sleep * 2. page queue commands do not support asynchrous completion @@ -1091,7 +1245,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1102,7 +1256,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) s64 ds; struct mlx5_cmd_stats *stats; unsigned long flags; + unsigned long vector; + /* there can be at most 32 command queues */ + vector = vec & 0xffffffff; for (i = 0; i < (1 << cmd->log_sz); i++) { if (test_bit(i, &vector)) { struct semaphore *sem; @@ -1120,11 +1277,16 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) ent->ret = verify_signature(ent); else ent->ret = 0; - ent->status = ent->lay->status_own >> 1; + if (vec & MLX5_TRIGGERED_CMD_COMP) + ent->status = MLX5_DRIVER_STATUS_ABORTED; + else + ent->status = ent->lay->status_own >> 1; + mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); } free_ent(cmd, ent->idx); + if (ent->callback) { ds = ent->ts2 - ent->ts1; if (ent->op < ARRAY_SIZE(cmd->stats)) { @@ -1146,6 +1308,7 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); + err = err ? err : ent->status; free_cmd(ent); callback(err, context); } else { @@ -1193,6 +1356,11 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, return msg; } +static u16 opcode_from_in(struct mlx5_inbox_hdr *in) +{ + return be16_to_cpu(in->opcode); +} + static int is_manage_pages(struct mlx5_inbox_hdr *in) { return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; @@ -1207,6 +1375,15 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, gfp_t gfp; int err; u8 status = 0; + u32 drv_synd; + + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = mlx5_internal_err_ret_value(dev, opcode_from_in(in), &drv_synd, &status); + *get_synd_ptr(out) = cpu_to_be32(drv_synd); + *get_status_ptr(out) = status; + return err; + } pages_queue = is_manage_pages(in); gfp = callback ? GFP_ATOMIC : GFP_KERNEL; @@ -1373,6 +1550,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) int err; int i; + memset(cmd, 0, sizeof(*cmd)); cmd_if_rev = cmdif_rev(dev); if (cmd_if_rev != CMD_IF_REV) { dev_err(&dev->pdev->dev, diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/cq.c index eb0cf81f5..b51e42d6f 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -219,11 +219,30 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, } EXPORT_SYMBOL(mlx5_core_modify_cq); +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, + u16 cq_period, + u16 cq_max_count) +{ + struct mlx5_modify_cq_mbox_in in; + + memset(&in, 0, sizeof(in)); + + in.cqn = cpu_to_be32(cq->cqn); + in.ctx.cq_period = cpu_to_be16(cq_period); + in.ctx.cq_max_count = cpu_to_be16(cq_max_count); + in.field_select = cpu_to_be32(MLX5_CQ_MODIFY_PERIOD | + MLX5_CQ_MODIFY_COUNT); + + return mlx5_core_modify_cq(dev, cq, &in, sizeof(in)); +} + int mlx5_init_cq_table(struct mlx5_core_dev *dev) { struct mlx5_cq_table *table = &dev->priv.cq_table; int err; + memset(table, 0, sizeof(*table)); spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); err = mlx5_cq_debugfs_init(dev); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/en.h b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en.h new file mode 100644 index 000000000..22e72bf1a --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -0,0 +1,633 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/if_vlan.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/vport.h> +#include "wq.h" +#include "transobj.h" +#include "mlx5_core.h" + +#define MLX5E_MAX_NUM_TC 8 + +#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 +#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x1 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd + +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 + +#define MLX5E_LOG_INDIR_RQT_SIZE 0x7 +#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) +#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) +#define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ +#define MLX5E_SQ_BF_BUDGET 16 + +static const char vport_strings[][ETH_GSTRING_LEN] = { + /* vport statistics */ + "rx_packets", + "rx_bytes", + "tx_packets", + "tx_bytes", + "rx_error_packets", + "rx_error_bytes", + "tx_error_packets", + "tx_error_bytes", + "rx_unicast_packets", + "rx_unicast_bytes", + "tx_unicast_packets", + "tx_unicast_bytes", + "rx_multicast_packets", + "rx_multicast_bytes", + "tx_multicast_packets", + "tx_multicast_bytes", + "rx_broadcast_packets", + "rx_broadcast_bytes", + "tx_broadcast_packets", + "tx_broadcast_bytes", + + /* SW counters */ + "tso_packets", + "tso_bytes", + "lro_packets", + "lro_bytes", + "rx_csum_good", + "rx_csum_none", + "rx_csum_sw", + "tx_csum_offload", + "tx_queue_stopped", + "tx_queue_wake", + "tx_queue_dropped", + "rx_wqe_err", +}; + +struct mlx5e_vport_stats { + /* HW counters */ + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 rx_error_packets; + u64 rx_error_bytes; + u64 tx_error_packets; + u64 tx_error_bytes; + u64 rx_unicast_packets; + u64 rx_unicast_bytes; + u64 tx_unicast_packets; + u64 tx_unicast_bytes; + u64 rx_multicast_packets; + u64 rx_multicast_bytes; + u64 tx_multicast_packets; + u64 tx_multicast_bytes; + u64 rx_broadcast_packets; + u64 rx_broadcast_bytes; + u64 tx_broadcast_packets; + u64 tx_broadcast_bytes; + + /* SW counters */ + u64 tso_packets; + u64 tso_bytes; + u64 lro_packets; + u64 lro_bytes; + u64 rx_csum_good; + u64 rx_csum_none; + u64 rx_csum_sw; + u64 tx_csum_offload; + u64 tx_queue_stopped; + u64 tx_queue_wake; + u64 tx_queue_dropped; + u64 rx_wqe_err; + +#define NUM_VPORT_COUNTERS 32 +}; + +static const char pport_strings[][ETH_GSTRING_LEN] = { + /* IEEE802.3 counters */ + "frames_tx", + "frames_rx", + "check_seq_err", + "alignment_err", + "octets_tx", + "octets_received", + "multicast_xmitted", + "broadcast_xmitted", + "multicast_rx", + "broadcast_rx", + "in_range_len_errors", + "out_of_range_len", + "too_long_errors", + "symbol_err", + "mac_control_tx", + "mac_control_rx", + "unsupported_op_rx", + "pause_ctrl_rx", + "pause_ctrl_tx", + + /* RFC2863 counters */ + "in_octets", + "in_ucast_pkts", + "in_discards", + "in_errors", + "in_unknown_protos", + "out_octets", + "out_ucast_pkts", + "out_discards", + "out_errors", + "in_multicast_pkts", + "in_broadcast_pkts", + "out_multicast_pkts", + "out_broadcast_pkts", + + /* RFC2819 counters */ + "drop_events", + "octets", + "pkts", + "broadcast_pkts", + "multicast_pkts", + "crc_align_errors", + "undersize_pkts", + "oversize_pkts", + "fragments", + "jabbers", + "collisions", + "p64octets", + "p65to127octets", + "p128to255octets", + "p256to511octets", + "p512to1023octets", + "p1024to1518octets", + "p1519to2047octets", + "p2048to4095octets", + "p4096to8191octets", + "p8192to10239octets", +}; + +#define NUM_IEEE_802_3_COUNTERS 19 +#define NUM_RFC_2863_COUNTERS 13 +#define NUM_RFC_2819_COUNTERS 21 +#define NUM_PPORT_COUNTERS (NUM_IEEE_802_3_COUNTERS + \ + NUM_RFC_2863_COUNTERS + \ + NUM_RFC_2819_COUNTERS) + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[NUM_IEEE_802_3_COUNTERS]; + __be64 RFC_2863_counters[NUM_RFC_2863_COUNTERS]; + __be64 RFC_2819_counters[NUM_RFC_2819_COUNTERS]; +}; + +static const char rq_stats_strings[][ETH_GSTRING_LEN] = { + "packets", + "csum_none", + "csum_sw", + "lro_packets", + "lro_bytes", + "wqe_err" +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 csum_none; + u64 csum_sw; + u64 lro_packets; + u64 lro_bytes; + u64 wqe_err; +#define NUM_RQ_STATS 6 +}; + +static const char sq_stats_strings[][ETH_GSTRING_LEN] = { + "packets", + "tso_packets", + "tso_bytes", + "csum_offload_none", + "stopped", + "wake", + "dropped", + "nop" +}; + +struct mlx5e_sq_stats { + u64 packets; + u64 tso_packets; + u64 tso_bytes; + u64 csum_offload_none; + u64 stopped; + u64 wake; + u64 dropped; + u64 nop; +#define NUM_SQ_STATS 8 +}; + +struct mlx5e_stats { + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; +}; + +struct mlx5e_params { + u8 log_sq_size; + u8 log_rq_size; + u16 num_channels; + u8 default_vlan_prio; + u8 num_tc; + u16 rx_cq_moderation_usec; + u16 rx_cq_moderation_pkts; + u16 tx_cq_moderation_usec; + u16 tx_cq_moderation_pkts; + u16 min_rx_wqes; + bool lro_en; + u32 lro_wqe_sz; + u16 tx_max_inline; + u8 rss_hfunc; + u8 toeplitz_hash_key[40]; + u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; +}; + +enum { + MLX5E_RQ_STATE_POST_WQES_ENABLE, +}; + +enum cq_flags { + MLX5E_CQ_HAS_CQES = 1, +}; + +struct mlx5e_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + unsigned long flags; + + /* data path - accessed per napi poll */ + struct napi_struct *napi; + struct mlx5_core_cq mcq; + struct mlx5e_channel *channel; + struct mlx5e_priv *priv; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_rq { + /* data path */ + struct mlx5_wq_ll wq; + u32 wqe_sz; + struct sk_buff **skb; + + struct device *pdev; + struct net_device *netdev; + struct mlx5e_rq_stats stats; + struct mlx5e_cq cq; + + unsigned long state; + int ix; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; + u32 rqn; + struct mlx5e_channel *channel; + struct mlx5e_priv *priv; +} ____cacheline_aligned_in_smp; + +struct mlx5e_tx_skb_cb { + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; +}; + +#define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb) + +enum mlx5e_dma_map_type { + MLX5E_DMA_MAP_SINGLE, + MLX5E_DMA_MAP_PAGE +}; + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; + enum mlx5e_dma_map_type type; +}; + +enum { + MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, +}; + +struct mlx5e_sq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u32 dma_fifo_cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + u16 bf_offset; + u16 prev_cc; + u8 bf_budget; + struct mlx5e_sq_stats stats; + + struct mlx5e_cq cq; + + /* pointers to per packet info: write@xmit, read@completion */ + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + void __iomem *uar_map; + void __iomem *uar_bf_map; + struct netdev_queue *txq; + u32 sqn; + u16 bf_buf_size; + u16 max_inline; + u16 edge; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_uar uar; + struct mlx5e_channel *channel; + int tc; +} ____cacheline_aligned_in_smp; + +static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) +{ + return (((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n) || + (sq->cc == sq->pc)); +} + +enum channel_flags { + MLX5E_CHANNEL_NAPI_SCHED = 1, +}; + +struct mlx5e_channel { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + u8 num_tc; + unsigned long flags; + + /* control */ + struct mlx5e_priv *priv; + int ix; + int cpu; +}; + +enum mlx5e_traffic_types { + MLX5E_TT_IPV4_TCP, + MLX5E_TT_IPV6_TCP, + MLX5E_TT_IPV4_UDP, + MLX5E_TT_IPV6_UDP, + MLX5E_TT_IPV4_IPSEC_AH, + MLX5E_TT_IPV6_IPSEC_AH, + MLX5E_TT_IPV4_IPSEC_ESP, + MLX5E_TT_IPV6_IPSEC_ESP, + MLX5E_TT_IPV4, + MLX5E_TT_IPV6, + MLX5E_TT_ANY, + MLX5E_NUM_TT, +}; + +enum mlx5e_rqt_ix { + MLX5E_INDIRECTION_RQT, + MLX5E_SINGLE_RQ_RQT, + MLX5E_NUM_RQT, +}; + +struct mlx5e_eth_addr_info { + u8 addr[ETH_ALEN + 2]; + u32 tt_vec; + u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */ +}; + +#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) + +struct mlx5e_eth_addr_db { + struct hlist_head netdev_uc[MLX5E_ETH_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_ETH_ADDR_HASH_SIZE]; + struct mlx5e_eth_addr_info broadcast; + struct mlx5e_eth_addr_info allmulti; + struct mlx5e_eth_addr_info promisc; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; +}; + +enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_OPENED, + MLX5E_STATE_DESTROYING, +}; + +struct mlx5e_vlan_db { + u32 active_vlans_ft_ix[VLAN_N_VID]; + u32 untagged_rule_ft_ix; + u32 any_vlan_rule_ft_ix; + bool filter_disabled; +}; + +struct mlx5e_flow_table { + void *vlan; + void *main; +}; + +struct mlx5e_priv { + /* priv data path fields - start */ + int default_vlan_prio; + struct mlx5e_sq **txq_to_sq_map; + int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + /* priv data path fields - end */ + + unsigned long state; + struct mutex state_lock; /* Protects Interface state */ + struct mlx5_uar cq_uar; + u32 pdn; + u32 tdn; + struct mlx5_core_mr mr; + struct mlx5e_rq drop_rq; + + struct mlx5e_channel **channel; + u32 tisn[MLX5E_MAX_NUM_TC]; + u32 rqtn[MLX5E_NUM_RQT]; + u32 tirn[MLX5E_NUM_TT]; + + struct mlx5e_flow_table ft; + struct mlx5e_eth_addr_db eth_addr; + struct mlx5e_vlan_db vlan; + + struct mlx5e_params params; + spinlock_t async_events_spinlock; /* sync hw events */ + struct work_struct update_carrier_work; + struct work_struct set_rx_mode_work; + struct delayed_work update_stats_work; + + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_stats stats; +}; + +#define MLX5E_NET_IP_ALIGN 2 + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; +}; + +struct mlx5e_rx_wqe { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data; +}; + +enum mlx5e_link_mode { + MLX5E_1000BASE_CX_SGMII = 0, + MLX5E_1000BASE_KX = 1, + MLX5E_10GBASE_CX4 = 2, + MLX5E_10GBASE_KX4 = 3, + MLX5E_10GBASE_KR = 4, + MLX5E_20GBASE_KR2 = 5, + MLX5E_40GBASE_CR4 = 6, + MLX5E_40GBASE_KR4 = 7, + MLX5E_56GBASE_R4 = 8, + MLX5E_10GBASE_CR = 12, + MLX5E_10GBASE_SR = 13, + MLX5E_10GBASE_ER = 14, + MLX5E_40GBASE_SR4 = 15, + MLX5E_40GBASE_LR4 = 16, + MLX5E_100GBASE_CR4 = 20, + MLX5E_100GBASE_SR4 = 21, + MLX5E_100GBASE_KR4 = 22, + MLX5E_100GBASE_LR4 = 23, + MLX5E_100BASE_TX = 24, + MLX5E_100BASE_T = 25, + MLX5E_10GBASE_T = 26, + MLX5E_25GBASE_CR = 27, + MLX5E_25GBASE_KR = 28, + MLX5E_25GBASE_SR = 29, + MLX5E_50GBASE_CR2 = 30, + MLX5E_50GBASE_KR2 = 31, + MLX5E_LINK_MODES_NUMBER, +}; + +#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + +void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback); +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); + +void mlx5e_completion_event(struct mlx5_core_cq *mcq); +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); +int mlx5e_napi_poll(struct napi_struct *napi, int budget); +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq); +bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); + +void mlx5e_update_stats(struct mlx5e_priv *priv); + +int mlx5e_create_flow_tables(struct mlx5e_priv *priv); +void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv); +void mlx5e_init_eth_addr(struct mlx5e_priv *priv); +void mlx5e_set_rx_mode_work(struct work_struct *work); + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix); + +int mlx5e_open_locked(struct net_device *netdev); +int mlx5e_close_locked(struct net_device *netdev); + +static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, + struct mlx5e_tx_wqe *wqe, int bf_sz) +{ + u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; + + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *sq->wq.db = cpu_to_be32(sq->pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + if (bf_sz) { + __iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz); + + /* flush the write-combining mapped buffer */ + wmb(); + + } else { + mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL); + } + + sq->bf_offset ^= sq->bf_buf_size; +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); +} + +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return min_t(int, mdev->priv.eq_table.num_comp_vectors, + MLX5E_MAX_NUM_CHANNELS); +} + +extern const struct ethtool_ops mlx5e_ethtool_ops; +u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c new file mode 100644 index 000000000..2e022e900 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +static void mlx5e_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, DRIVER_VERSION " (" DRIVER_RELDATE ")", + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); + strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), + sizeof(drvinfo->bus_info)); +} + +static const struct { + u32 supported; + u32 advertised; + u32 speed; +} ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = { + .supported = SUPPORTED_1000baseKX_Full, + .advertised = ADVERTISED_1000baseKX_Full, + .speed = 1000, + }, + [MLX5E_1000BASE_KX] = { + .supported = SUPPORTED_1000baseKX_Full, + .advertised = ADVERTISED_1000baseKX_Full, + .speed = 1000, + }, + [MLX5E_10GBASE_CX4] = { + .supported = SUPPORTED_10000baseKX4_Full, + .advertised = ADVERTISED_10000baseKX4_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_KX4] = { + .supported = SUPPORTED_10000baseKX4_Full, + .advertised = ADVERTISED_10000baseKX4_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_KR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_20GBASE_KR2] = { + .supported = SUPPORTED_20000baseKR2_Full, + .advertised = ADVERTISED_20000baseKR2_Full, + .speed = 20000, + }, + [MLX5E_40GBASE_CR4] = { + .supported = SUPPORTED_40000baseCR4_Full, + .advertised = ADVERTISED_40000baseCR4_Full, + .speed = 40000, + }, + [MLX5E_40GBASE_KR4] = { + .supported = SUPPORTED_40000baseKR4_Full, + .advertised = ADVERTISED_40000baseKR4_Full, + .speed = 40000, + }, + [MLX5E_56GBASE_R4] = { + .supported = SUPPORTED_56000baseKR4_Full, + .advertised = ADVERTISED_56000baseKR4_Full, + .speed = 56000, + }, + [MLX5E_10GBASE_CR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_SR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_ER] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_40GBASE_SR4] = { + .supported = SUPPORTED_40000baseSR4_Full, + .advertised = ADVERTISED_40000baseSR4_Full, + .speed = 40000, + }, + [MLX5E_40GBASE_LR4] = { + .supported = SUPPORTED_40000baseLR4_Full, + .advertised = ADVERTISED_40000baseLR4_Full, + .speed = 40000, + }, + [MLX5E_100GBASE_CR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_SR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_KR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_LR4] = { + .speed = 100000, + }, + [MLX5E_100BASE_TX] = { + .speed = 100, + }, + [MLX5E_100BASE_T] = { + .supported = SUPPORTED_100baseT_Full, + .advertised = ADVERTISED_100baseT_Full, + .speed = 100, + }, + [MLX5E_10GBASE_T] = { + .supported = SUPPORTED_10000baseT_Full, + .advertised = ADVERTISED_10000baseT_Full, + .speed = 1000, + }, + [MLX5E_25GBASE_CR] = { + .speed = 25000, + }, + [MLX5E_25GBASE_KR] = { + .speed = 25000, + }, + [MLX5E_25GBASE_SR] = { + .speed = 25000, + }, + [MLX5E_50GBASE_CR2] = { + .speed = 50000, + }, + [MLX5E_50GBASE_KR2] = { + .speed = 50000, + }, +}; + +static int mlx5e_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + priv->params.num_channels * NUM_RQ_STATS + + priv->params.num_channels * priv->params.num_tc * + NUM_SQ_STATS; + /* fallthrough */ + default: + return -EOPNOTSUPP; + } +} + +static void mlx5e_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + int i, j, tc, idx = 0; + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (stringset) { + case ETH_SS_PRIV_FLAGS: + break; + + case ETH_SS_TEST: + break; + + case ETH_SS_STATS: + /* VPORT counters */ + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_strings[i]); + + /* PPORT counters */ + for (i = 0; i < NUM_PPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_strings[i]); + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + "rx%d_%s", i, rq_stats_strings[j]); + + for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->params.num_tc; tc++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + + (idx++) * ETH_GSTRING_LEN, + "tx%d_%d_%s", i, tc, + sq_stats_strings[j]); + break; + } +} + +static void mlx5e_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i, j, tc, idx = 0; + + if (!data) + return; + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_stats(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = ((u64 *)&priv->stats.vport)[i]; + + for (i = 0; i < NUM_PPORT_COUNTERS; i++) + data[idx++] = be64_to_cpu(((__be64 *)&priv->stats.pport)[i]); + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + data[idx++] = !test_bit(MLX5E_STATE_OPENED, + &priv->state) ? 0 : + ((u64 *)&priv->channel[i]->rq.stats)[j]; + + for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->params.num_tc; tc++) + for (j = 0; j < NUM_SQ_STATS; j++) + data[idx++] = !test_bit(MLX5E_STATE_OPENED, + &priv->state) ? 0 : + ((u64 *)&priv->channel[i]->sq[tc].stats)[j]; +} + +static void mlx5e_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; + param->rx_pending = 1 << priv->params.log_rq_size; + param->tx_pending = 1 << priv->params.log_sq_size; +} + +static int mlx5e_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + bool was_opened; + u16 min_rx_wqes; + u8 log_rq_size; + u8 log_sq_size; + int err = 0; + + if (param->rx_jumbo_pending) { + netdev_info(dev, "%s: rx_jumbo_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_mini_pending) { + netdev_info(dev, "%s: rx_mini_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) { + netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { + netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + if (param->tx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE)) { + netdev_info(dev, "%s: tx_pending (%d) > max (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + + log_rq_size = order_base_2(param->rx_pending); + log_sq_size = order_base_2(param->tx_pending); + min_rx_wqes = min_t(u16, param->rx_pending - 1, + MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + + if (log_rq_size == priv->params.log_rq_size && + log_sq_size == priv->params.log_sq_size && + min_rx_wqes == priv->params.min_rx_wqes) + return 0; + + mutex_lock(&priv->state_lock); + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(dev); + + priv->params.log_rq_size = log_rq_size; + priv->params.log_sq_size = log_sq_size; + priv->params.min_rx_wqes = min_rx_wqes; + + if (was_opened) + err = mlx5e_open_locked(dev); + + mutex_unlock(&priv->state_lock); + + return err; +} + +static void mlx5e_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + ch->max_combined = mlx5e_get_max_num_channels(priv->mdev); + ch->combined_count = priv->params.num_channels; +} + +static int mlx5e_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int ncv = mlx5e_get_max_num_channels(priv->mdev); + unsigned int count = ch->combined_count; + bool was_opened; + int err = 0; + + if (!count) { + netdev_info(dev, "%s: combined_count=0 not supported\n", + __func__); + return -EINVAL; + } + if (ch->rx_count || ch->tx_count) { + netdev_info(dev, "%s: separate rx/tx count not supported\n", + __func__); + return -EINVAL; + } + if (count > ncv) { + netdev_info(dev, "%s: count (%d) > max (%d)\n", + __func__, count, ncv); + return -EINVAL; + } + + if (priv->params.num_channels == count) + return 0; + + mutex_lock(&priv->state_lock); + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(dev); + + priv->params.num_channels = count; + + if (was_opened) + err = mlx5e_open_locked(dev); + + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + coal->rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; + coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts; + coal->tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; + coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation_pkts; + + return 0; +} + +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channel *c; + int tc; + int i; + + priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs; + priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames; + priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs; + priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames; + + for (i = 0; i < priv->params.num_channels; ++i) { + c = priv->channel[i]; + + for (tc = 0; tc < c->num_tc; tc++) { + mlx5_core_modify_cq_moderation(mdev, + &c->sq[tc].cq.mcq, + coal->tx_coalesce_usecs, + coal->tx_max_coalesced_frames); + } + + mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, + coal->rx_coalesce_usecs, + coal->rx_max_coalesced_frames); + } + + return 0; +} + +static u32 ptys2ethtool_supported_link(u32 eth_proto_cap) +{ + int i; + u32 supported_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_cap & MLX5E_PROT_MASK(i)) + supported_modes |= ptys2ethtool_table[i].supported; + } + return supported_modes; +} + +static u32 ptys2ethtool_adver_link(u32 eth_proto_cap) +{ + int i; + u32 advertising_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_cap & MLX5E_PROT_MASK(i)) + advertising_modes |= ptys2ethtool_table[i].advertised; + } + return advertising_modes; +} + +static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) +{ + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return SUPPORTED_FIBRE; + } + + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { + return SUPPORTED_Backplane; + } + return 0; +} + +static void get_speed_duplex(struct net_device *netdev, + u32 eth_proto_oper, + struct ethtool_cmd *cmd) +{ + int i; + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + + if (!netif_carrier_ok(netdev)) + goto out; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_oper & MLX5E_PROT_MASK(i)) { + speed = ptys2ethtool_table[i].speed; + duplex = DUPLEX_FULL; + break; + } + } +out: + ethtool_cmd_speed_set(cmd, speed); + cmd->duplex = duplex; +} + +static void get_supported(u32 eth_proto_cap, u32 *supported) +{ + *supported |= ptys2ethtool_supported_port(eth_proto_cap); + *supported |= ptys2ethtool_supported_link(eth_proto_cap); + *supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; +} + +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, + u8 rx_pause, u32 *advertising) +{ + *advertising |= ptys2ethtool_adver_link(eth_proto_cap); + *advertising |= tx_pause ? ADVERTISED_Pause : 0; + *advertising |= (tx_pause ^ rx_pause) ? ADVERTISED_Asym_Pause : 0; +} + +static u8 get_connector_port(u32 eth_proto) +{ + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; + } + + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; + } + + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; + } + + return PORT_OTHER; +} + +static void get_lp_advertising(u32 eth_proto_lp, u32 *lp_advertising) +{ + *lp_advertising = ptys2ethtool_adver_link(eth_proto_lp); +} + +static int mlx5e_get_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 eth_proto_cap; + u32 eth_proto_admin; + u32 eth_proto_lp; + u32 eth_proto_oper; + int err; + + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + + if (err) { + netdev_err(netdev, "%s: query port ptys failed: %d\n", + __func__, err); + goto err_query_ptys; + } + + eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + + cmd->supported = 0; + cmd->advertising = 0; + + get_supported(eth_proto_cap, &cmd->supported); + get_advertising(eth_proto_admin, 0, 0, &cmd->advertising); + get_speed_duplex(netdev, eth_proto_oper, cmd); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; + + cmd->port = get_connector_port(eth_proto_oper); + get_lp_advertising(eth_proto_lp, &cmd->lp_advertising); + + cmd->transceiver = XCVR_INTERNAL; + +err_query_ptys: + return err; +} + +static u32 mlx5e_ethtool2ptys_adver_link(u32 link_modes) +{ + u32 i, ptys_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (ptys2ethtool_table[i].advertised & link_modes) + ptys_modes |= MLX5E_PROT_MASK(i); + } + + return ptys_modes; +} + +static u32 mlx5e_ethtool2ptys_speed_link(u32 speed) +{ + u32 i, speed_links = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (ptys2ethtool_table[i].speed == speed) + speed_links |= MLX5E_PROT_MASK(i); + } + + return speed_links; +} + +static int mlx5e_set_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 link_modes; + u32 speed; + u32 eth_proto_cap, eth_proto_admin; + enum mlx5_port_status ps; + int err; + + speed = ethtool_cmd_speed(cmd); + + link_modes = cmd->autoneg == AUTONEG_ENABLE ? + mlx5e_ethtool2ptys_adver_link(cmd->advertising) : + mlx5e_ethtool2ptys_speed_link(speed); + + err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: query port eth proto cap failed: %d\n", + __func__, err); + goto out; + } + + link_modes = link_modes & eth_proto_cap; + if (!link_modes) { + netdev_err(netdev, "%s: Not supported link mode(s) requested", + __func__); + err = -EINVAL; + goto out; + } + + err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: query port eth proto admin failed: %d\n", + __func__, err); + goto out; + } + + if (link_modes == eth_proto_admin) + goto out; + + mlx5_query_port_admin_status(mdev, &ps); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); + mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); + +out: + return err; +} + +static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return sizeof(priv->params.toeplitz_hash_key); +} + +static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) +{ + return MLX5E_INDIR_RQT_SIZE; +} + +static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (indir) + memcpy(indir, priv->params.indirection_rqt, + sizeof(priv->params.indirection_rqt)); + + if (key) + memcpy(key, priv->params.toeplitz_hash_key, + sizeof(priv->params.toeplitz_hash_key)); + + if (hfunc) + *hfunc = priv->params.rss_hfunc; + + return 0; +} + +static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + bool close_open; + int err = 0; + + if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && + (hfunc != ETH_RSS_HASH_XOR) && + (hfunc != ETH_RSS_HASH_TOP)) + return -EINVAL; + + mutex_lock(&priv->state_lock); + + if (indir) { + memcpy(priv->params.indirection_rqt, indir, + sizeof(priv->params.indirection_rqt)); + mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); + } + + close_open = (key || (hfunc != ETH_RSS_HASH_NO_CHANGE)) && + test_bit(MLX5E_STATE_OPENED, &priv->state); + if (close_open) + mlx5e_close_locked(dev); + + if (key) + memcpy(priv->params.toeplitz_hash_key, key, + sizeof(priv->params.toeplitz_hash_key)); + + if (hfunc != ETH_RSS_HASH_NO_CHANGE) + priv->params.rss_hfunc = hfunc; + + if (close_open) + err = mlx5e_open_locked(priv->netdev); + + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = priv->params.num_channels; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int mlx5e_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + void *data) +{ + const struct mlx5e_priv *priv = netdev_priv(dev); + int err = 0; + + switch (tuna->id) { + case ETHTOOL_TX_COPYBREAK: + *(u32 *)data = priv->params.tx_max_inline; + break; + default: + err = -EINVAL; + break; + } + + return err; +} + +static int mlx5e_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + bool was_opened; + u32 val; + int err = 0; + + switch (tuna->id) { + case ETHTOOL_TX_COPYBREAK: + val = *(u32 *)data; + if (val > mlx5e_get_max_inline_cap(mdev)) { + err = -EINVAL; + break; + } + + mutex_lock(&priv->state_lock); + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(dev); + + priv->params.tx_max_inline = val; + + if (was_opened) + err = mlx5e_open_locked(dev); + + mutex_unlock(&priv->state_lock); + break; + default: + err = -EINVAL; + break; + } + + return err; +} + +static void mlx5e_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause, + &pauseparam->tx_pause); + if (err) { + netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n", + __func__, err); + } +} + +static int mlx5e_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (pauseparam->autoneg) + return -EINVAL; + + err = mlx5_set_port_pause(mdev, + pauseparam->rx_pause ? 1 : 0, + pauseparam->tx_pause ? 1 : 0); + if (err) { + netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n", + __func__, err); + } + + return err; +} + +const struct ethtool_ops mlx5e_ethtool_ops = { + .get_drvinfo = mlx5e_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_get_strings, + .get_sset_count = mlx5e_get_sset_count, + .get_ethtool_stats = mlx5e_get_ethtool_stats, + .get_ringparam = mlx5e_get_ringparam, + .set_ringparam = mlx5e_set_ringparam, + .get_channels = mlx5e_get_channels, + .set_channels = mlx5e_set_channels, + .get_coalesce = mlx5e_get_coalesce, + .set_coalesce = mlx5e_set_coalesce, + .get_settings = mlx5e_get_settings, + .set_settings = mlx5e_set_settings, + .get_rxfh_key_size = mlx5e_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, + .get_rxfh = mlx5e_get_rxfh, + .set_rxfh = mlx5e_set_rxfh, + .get_rxnfc = mlx5e_get_rxnfc, + .get_tunable = mlx5e_get_tunable, + .set_tunable = mlx5e_set_tunable, + .get_pauseparam = mlx5e_get_pauseparam, + .set_pauseparam = mlx5e_set_pauseparam, +}; diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c new file mode 100644 index 000000000..22d603f78 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -0,0 +1,907 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/list.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/flow_table.h> +#include "en.h" + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, + MLX5E_PROMISC = 2, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_eth_addr_hash_node { + struct hlist_node hlist; + u8 action; + struct mlx5e_eth_addr_info ai; +}; + +static inline int mlx5e_hash_eth_addr(u8 *addr) +{ + return addr[5]; +} + +static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) +{ + struct mlx5e_eth_addr_hash_node *hn; + int ix = mlx5e_hash_eth_addr(addr); + int found = 0; + + hlist_for_each_entry(hn, &hash[ix], hlist) + if (ether_addr_equal_64bits(hn->ai.addr, addr)) { + found = 1; + break; + } + + if (found) { + hn->action = MLX5E_ACTION_NONE; + return; + } + + hn = kzalloc(sizeof(*hn), GFP_ATOMIC); + if (!hn) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + hlist_add_head(&hn->hlist, &hash[ix]); +} + +static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +{ + hlist_del(&hn->hlist); + kfree(hn); +} + +static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai) +{ + void *ft = priv->ft.main; + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) + mlx5_del_flow_table_entry(ft, + ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) + mlx5_del_flow_table_entry(ft, + ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) + mlx5_del_flow_table_entry(ft, + ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) + mlx5_del_flow_table_entry(ft, + ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]); + + if (ai->tt_vec & BIT(MLX5E_TT_ANY)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]); +} + +static int mlx5e_get_eth_addr_type(u8 *addr) +{ + if (is_unicast_ether_addr(addr)) + return MLX5E_UC; + + if ((addr[0] == 0x01) && + (addr[1] == 0x00) && + (addr[2] == 0x5e) && + !(addr[3] & 0x80)) + return MLX5E_MC_IPV4; + + if ((addr[0] == 0x33) && + (addr[1] == 0x33)) + return MLX5E_MC_IPV6; + + return MLX5E_MC_OTHER; +} + +static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) +{ + int eth_addr_type; + u32 ret; + + switch (type) { + case MLX5E_FULLMATCH: + eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); + switch (eth_addr_type) { + case MLX5E_UC: + ret = + BIT(MLX5E_TT_IPV4_TCP) | + BIT(MLX5E_TT_IPV6_TCP) | + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4_IPSEC_AH) | + BIT(MLX5E_TT_IPV6_IPSEC_AH) | + BIT(MLX5E_TT_IPV4_IPSEC_ESP) | + BIT(MLX5E_TT_IPV6_IPSEC_ESP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + + case MLX5E_MC_IPV4: + ret = + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV4) | + 0; + break; + + case MLX5E_MC_IPV6: + ret = + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV6) | + 0; + break; + + case MLX5E_MC_OTHER: + ret = + BIT(MLX5E_TT_ANY) | + 0; + break; + } + + break; + + case MLX5E_ALLMULTI: + ret = + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + + default: /* MLX5E_PROMISC */ + ret = + BIT(MLX5E_TT_IPV4_TCP) | + BIT(MLX5E_TT_IPV6_TCP) | + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4_IPSEC_AH) | + BIT(MLX5E_TT_IPV6_IPSEC_AH) | + BIT(MLX5E_TT_IPV4_IPSEC_ESP) | + BIT(MLX5E_TT_IPV6_IPSEC_ESP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + } + + return ret; +} + +static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type, + void *flow_context, void *match_criteria) +{ + u8 match_criteria_enable = 0; + void *match_value; + void *dest; + u8 *dmac; + u8 *match_criteria_dmac; + void *ft = priv->ft.main; + u32 *tirn = priv->tirn; + u32 *ft_ix; + u32 tt_vec; + int err; + + match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + dmac = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dest = MLX5_ADDR_OF(flow_context, flow_context, destination); + + MLX5_SET(flow_context, flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + MLX5_SET(flow_context, flow_context, destination_list_size, 1); + MLX5_SET(dest_format_struct, dest, destination_type, + MLX5_FLOW_CONTEXT_DEST_TYPE_TIR); + + switch (type) { + case MLX5E_FULLMATCH: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + memset(match_criteria_dmac, 0xff, ETH_ALEN); + ether_addr_copy(dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + match_criteria_dmac[0] = 0x01; + dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + tt_vec = mlx5e_get_tt_vec(ai, type); + + ft_ix = &ai->ft_ix[MLX5E_TT_ANY]; + if (tt_vec & BIT(MLX5E_TT_ANY)) { + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_ANY]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_ANY); + } + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ethertype); + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV4]; + if (tt_vec & BIT(MLX5E_TT_IPV4)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV4); + } + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV6]; + if (tt_vec & BIT(MLX5E_TT_IPV6)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV6); + } + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ip_protocol); + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_UDP); + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_UDP]; + if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_UDP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); + } + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_UDP]; + if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_UDP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); + } + + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_TCP); + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_TCP]; + if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_TCP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); + } + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_TCP]; + if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_TCP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); + } + + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_AH); + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]; + if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_IPSEC_AH]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); + } + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]; + if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_IPSEC_AH]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); + } + + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_ESP); + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]; + if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_IPSEC_ESP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); + } + + ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]; + if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_IPSEC_ESP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + ft_ix); + if (err) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); + } + + return 0; + +err_del_ai: + mlx5e_del_eth_addr_from_flow_table(priv, ai); + + return err; +} + +static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type) +{ + u32 *flow_context; + u32 *match_criteria; + int err; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!flow_context || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_eth_addr_rule_out; + } + + err = __mlx5e_add_eth_addr_rule(priv, ai, type, flow_context, + match_criteria); + if (err) + netdev_err(priv->netdev, "%s: failed\n", __func__); + +add_eth_addr_rule_out: + kvfree(match_criteria); + kvfree(flow_context); + return err; +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_VID, +}; + +static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + u8 match_criteria_enable = 0; + u32 *flow_context; + void *match_value; + void *dest; + u32 *match_criteria; + u32 *ft_ix; + int err; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!flow_context || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_vlan_rule_out; + } + match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + dest = MLX5_ADDR_OF(flow_context, flow_context, destination); + + MLX5_SET(flow_context, flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + MLX5_SET(flow_context, flow_context, destination_list_size, 1); + MLX5_SET(dest_format_struct, dest, destination_type, + MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE); + MLX5_SET(dest_format_struct, dest, destination_id, + mlx5_get_flow_table_id(priv->ft.main)); + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.vlan_tag); + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + ft_ix = &priv->vlan.untagged_rule_ft_ix; + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + ft_ix = &priv->vlan.any_vlan_rule_ft_ix; + MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, + 1); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; + MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, + 1); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, + vid); + break; + } + + err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable, + match_criteria, flow_context, ft_ix); + if (err) + netdev_err(priv->netdev, "%s: failed\n", __func__); + +add_vlan_rule_out: + kvfree(match_criteria); + kvfree(flow_context); + return err; +} + +static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.untagged_rule_ft_ix); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.any_vlan_rule_ft_ix); + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.active_vlans_ft_ix[vid]); + break; + } +} + +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +{ + if (!priv->vlan.filter_disabled) + return; + + priv->vlan.filter_disabled = false; + if (priv->netdev->flags & IFF_PROMISC) + return; + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +{ + if (priv->vlan.filter_disabled) + return; + + priv->vlan.filter_disabled = true; + if (priv->netdev->flags & IFF_PROMISC) + return; + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + + return 0; +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +static void mlx5e_execute_action(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_hash_node *hn) +{ + switch (hn->action) { + case MLX5E_ACTION_ADD: + mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); + mlx5e_del_eth_addr_from_hash(hn); + break; + } +} + +static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, + priv->netdev->dev_addr); + + netdev_for_each_uc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); + + netdev_for_each_mc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); + + netif_addr_unlock_bh(netdev); +} + +static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + mlx5e_execute_action(priv, hn); + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + mlx5e_execute_action(priv, hn); +} + +static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5e_sync_netdev_addr(priv); + + mlx5e_apply_netdev_addr(priv); +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct net_device *ndev = priv->netdev; + + bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); + bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + + if (enable_promisc) { + mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->vlan.filter_disabled) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } + if (enable_allmulti) + mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_netdev_addr(priv); + + if (disable_broadcast) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + if (disable_promisc) { + if (!priv->vlan.filter_disabled) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + } + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; +} + +void mlx5e_init_eth_addr(struct mlx5e_priv *priv) +{ + ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); +} + +static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_group *g; + u8 *dmac; + + g = kcalloc(9, sizeof(*g), GFP_KERNEL); + if (!g) + return -ENOMEM; + + g[0].log_sz = 3; + g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.ip_protocol); + + g[1].log_sz = 1; + g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, + outer_headers.ethertype); + + g[2].log_sz = 0; + + g[3].log_sz = 14; + g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, + outer_headers.ip_protocol); + + g[4].log_sz = 13; + g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria, + outer_headers.ethertype); + + g[5].log_sz = 11; + g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + + g[6].log_sz = 2; + g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, + outer_headers.ip_protocol); + + g[7].log_sz = 1; + g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria, + outer_headers.ethertype); + + g[8].log_sz = 0; + g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + priv->ft.main = mlx5_create_flow_table(priv->mdev, 1, + MLX5_FLOW_TABLE_TYPE_NIC_RCV, + 9, g); + kfree(g); + + return priv->ft.main ? 0 : -ENOMEM; +} + +static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->ft.main); +} + +static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_group *g; + + g = kcalloc(2, sizeof(*g), GFP_KERNEL); + if (!g) + return -ENOMEM; + + g[0].log_sz = 12; + g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.first_vid); + + /* untagged + any vlan id */ + g[1].log_sz = 1; + g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, + outer_headers.vlan_tag); + + priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0, + MLX5_FLOW_TABLE_TYPE_NIC_RCV, + 2, g); + + kfree(g); + return priv->ft.vlan ? 0 : -ENOMEM; +} + +static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->ft.vlan); +} + +int mlx5e_create_flow_tables(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_main_flow_table(priv); + if (err) + return err; + + err = mlx5e_create_vlan_flow_table(priv); + if (err) + goto err_destroy_main_flow_table; + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + goto err_destroy_vlan_flow_table; + + return 0; + +err_destroy_vlan_flow_table: + mlx5e_destroy_vlan_flow_table(priv); + +err_destroy_main_flow_table: + mlx5e_destroy_main_flow_table(priv); + + return err; +} + +void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) +{ + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + mlx5e_destroy_vlan_flow_table(priv); + mlx5e_destroy_main_flow_table(priv); +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_main.c new file mode 100644 index 000000000..1203d892e --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -0,0 +1,2276 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/flow_table.h> +#include "en.h" + +struct mlx5e_rq_param { + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; +}; + +struct mlx5e_sq_param { + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; + u16 max_inline; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; +}; + +static void mlx5e_update_carrier(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 port_state; + + port_state = mlx5_query_vport_state(mdev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + + if (port_state == VPORT_STATE_UP) + netif_carrier_on(priv->netdev); + else + netif_carrier_off(priv->netdev); +} + +static void mlx5e_update_carrier_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_carrier_work); + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_carrier(priv); + mutex_unlock(&priv->state_lock); +} + +static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *s = &priv->stats.pport; + u32 *in; + u32 *out; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + in = mlx5_vzalloc(sz); + out = mlx5_vzalloc(sz); + if (!in || !out) + goto free_out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + memcpy(s->IEEE_802_3_counters, + MLX5_ADDR_OF(ppcnt_reg, out, counter_set), + sizeof(s->IEEE_802_3_counters)); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + memcpy(s->RFC_2863_counters, + MLX5_ADDR_OF(ppcnt_reg, out, counter_set), + sizeof(s->RFC_2863_counters)); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + memcpy(s->RFC_2819_counters, + MLX5_ADDR_OF(ppcnt_reg, out, counter_set), + sizeof(s->RFC_2819_counters)); + +free_out: + kvfree(in); + kvfree(out); +} + +void mlx5e_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_vport_stats *s = &priv->stats.vport; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u64 tx_offload_none; + int i, j; + + out = mlx5_vzalloc(outlen); + if (!out) + return; + + /* Collect firts the SW counters and then HW for consistency */ + s->tso_packets = 0; + s->tso_bytes = 0; + s->tx_queue_stopped = 0; + s->tx_queue_wake = 0; + s->tx_queue_dropped = 0; + tx_offload_none = 0; + s->lro_packets = 0; + s->lro_bytes = 0; + s->rx_csum_none = 0; + s->rx_csum_sw = 0; + s->rx_wqe_err = 0; + for (i = 0; i < priv->params.num_channels; i++) { + rq_stats = &priv->channel[i]->rq.stats; + + s->lro_packets += rq_stats->lro_packets; + s->lro_bytes += rq_stats->lro_bytes; + s->rx_csum_none += rq_stats->csum_none; + s->rx_csum_sw += rq_stats->csum_sw; + s->rx_wqe_err += rq_stats->wqe_err; + + for (j = 0; j < priv->params.num_tc; j++) { + sq_stats = &priv->channel[i]->sq[j].stats; + + s->tso_packets += sq_stats->tso_packets; + s->tso_bytes += sq_stats->tso_bytes; + s->tx_queue_stopped += sq_stats->stopped; + s->tx_queue_wake += sq_stats->wake; + s->tx_queue_dropped += sq_stats->dropped; + tx_offload_none += sq_stats->csum_offload_none; + } + } + + /* HW counters */ + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, other_vport, 0); + + memset(out, 0, outlen); + + if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) + goto free_out; + +#define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + s->rx_error_packets = + MLX5_GET_CTR(out, received_errors.packets); + s->rx_error_bytes = + MLX5_GET_CTR(out, received_errors.octets); + s->tx_error_packets = + MLX5_GET_CTR(out, transmit_errors.packets); + s->tx_error_bytes = + MLX5_GET_CTR(out, transmit_errors.octets); + + s->rx_unicast_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets); + s->rx_unicast_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets); + s->tx_unicast_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets); + s->tx_unicast_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets); + + s->rx_multicast_packets = + MLX5_GET_CTR(out, received_eth_multicast.packets); + s->rx_multicast_bytes = + MLX5_GET_CTR(out, received_eth_multicast.octets); + s->tx_multicast_packets = + MLX5_GET_CTR(out, transmitted_eth_multicast.packets); + s->tx_multicast_bytes = + MLX5_GET_CTR(out, transmitted_eth_multicast.octets); + + s->rx_broadcast_packets = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + s->rx_broadcast_bytes = + MLX5_GET_CTR(out, received_eth_broadcast.octets); + s->tx_broadcast_packets = + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + s->tx_broadcast_bytes = + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + s->rx_packets = + s->rx_unicast_packets + + s->rx_multicast_packets + + s->rx_broadcast_packets; + s->rx_bytes = + s->rx_unicast_bytes + + s->rx_multicast_bytes + + s->rx_broadcast_bytes; + s->tx_packets = + s->tx_unicast_packets + + s->tx_multicast_packets + + s->tx_broadcast_packets; + s->tx_bytes = + s->tx_unicast_bytes + + s->tx_multicast_bytes + + s->tx_broadcast_bytes; + + /* Update calculated offload counters */ + s->tx_csum_offload = s->tx_packets - tx_offload_none; + s->rx_csum_good = s->rx_packets - s->rx_csum_none - + s->rx_csum_sw; + + mlx5e_update_pport_counters(priv); +free_out: + kvfree(out); +} + +static void mlx5e_update_stats_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, + update_stats_work); + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_update_stats(priv); + schedule_delayed_work(dwork, + msecs_to_jiffies( + MLX5E_UPDATE_STATS_INTERVAL)); + } + mutex_unlock(&priv->state_lock); +} + +static void __mlx5e_async_event(struct mlx5e_priv *priv, + enum mlx5_dev_event event) +{ + switch (event) { + case MLX5_DEV_EVENT_PORT_UP: + case MLX5_DEV_EVENT_PORT_DOWN: + schedule_work(&priv->update_carrier_work); + break; + + default: + break; + } +} + +static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, + enum mlx5_dev_event event, unsigned long param) +{ + struct mlx5e_priv *priv = vpriv; + + spin_lock(&priv->async_events_spinlock); + if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) + __mlx5e_async_event(priv, event); + spin_unlock(&priv->async_events_spinlock); +} + +static void mlx5e_enable_async_events(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); +} + +static void mlx5e_disable_async_events(struct mlx5e_priv *priv) +{ + spin_lock_irq(&priv->async_events_spinlock); + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + spin_unlock_irq(&priv->async_events_spinlock); +} + +#define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +#define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) + +static int mlx5e_create_rq(struct mlx5e_channel *c, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + void *rqc = param->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + int wq_sz; + int err; + int i; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + + err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, + &rq->wq_ctrl); + if (err) + return err; + + rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_ll_get_size(&rq->wq); + rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!rq->skb) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + + rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : + MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; + + wqe->data.lkey = c->mkey_be; + wqe->data.byte_count = + cpu_to_be32(byte_count | MLX5_HW_START_PADDING); + } + + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->channel = c; + rq->ix = c->ix; + rq->priv = c->priv; + + return 0; + +err_rq_wq_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); + + return err; +} + +static void mlx5e_destroy_rq(struct mlx5e_rq *rq) +{ + kfree(rq->skb); + mlx5_wq_destroy(&rq->wq_ctrl); +} + +static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +{ + struct mlx5e_priv *priv = rq->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * rq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + memcpy(rqc, param->rqc, sizeof(param->rqc)); + + MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, flush_in_error_en, 1); + MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); + + mlx5_fill_page_array(&rq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_disable_rq(struct mlx5e_rq *rq) +{ + mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn); +} + +static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(20000); + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_wq_ll *wq = &rq->wq; + + while (time_before(jiffies, exp_time)) { + if (wq->cur_sz >= priv->params.min_rx_wqes) + return 0; + + msleep(20); + } + + return -ETIMEDOUT; +} + +static int mlx5e_open_rq(struct mlx5e_channel *c, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + int err; + + err = mlx5e_create_rq(c, param, rq); + if (err) + return err; + + err = mlx5e_enable_rq(rq, param); + if (err) + goto err_destroy_rq; + + err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + goto err_disable_rq; + + set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + mlx5e_send_nop(&c->sq[0], true); /* trigger mlx5e_post_rx_wqes() */ + + return 0; + +err_disable_rq: + mlx5e_disable_rq(rq); +err_destroy_rq: + mlx5e_destroy_rq(rq); + + return err; +} + +static void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ + + mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + while (!mlx5_wq_ll_is_empty(&rq->wq)) + msleep(20); + + /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ + napi_synchronize(&rq->channel->napi); + + mlx5e_disable_rq(rq); + mlx5e_destroy_rq(rq); +} + +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + kfree(sq->dma_fifo); + kfree(sq->skb); +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); + sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, + numa); + + if (!sq->skb || !sq->dma_fifo) { + mlx5e_free_sq_db(sq); + return -ENOMEM; + } + + sq->dma_fifo_mask = df_sz - 1; + + return 0; +} + +static int mlx5e_create_sq(struct mlx5e_channel *c, + int tc, + struct mlx5e_sq_param *param, + struct mlx5e_sq *sq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *sqc = param->sqc; + void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + int txq_ix; + int err; + + err = mlx5_alloc_map_uar(mdev, &sq->uar); + if (err) + return err; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, + &sq->wq_ctrl); + if (err) + goto err_unmap_free_uar; + + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; + sq->uar_map = sq->uar.map; + sq->uar_bf_map = sq->uar.bf_map; + sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; + sq->max_inline = param->max_inline; + + err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + txq_ix = c->ix + tc * priv->params.num_channels; + sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + sq->bf_budget = MLX5E_SQ_BF_BUDGET; + priv->txq_to_sq_map[txq_ix] = sq; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &sq->uar); + + return err; +} + +static void mlx5e_destroy_sq(struct mlx5e_sq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + + mlx5e_free_sq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); + mlx5_unmap_free_uar(priv->mdev, &sq->uar); +} + +static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *sqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, param->sqc, sizeof(param->sqc)); + + MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_array(&sq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + MLX5_SET(modify_sq_in, in, sq_state, curr_state); + MLX5_SET(sqc, sqc, state, next_state); + + err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_disable_sq(struct mlx5e_sq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_core_destroy_sq(mdev, sq->sqn); +} + +static int mlx5e_open_sq(struct mlx5e_channel *c, + int tc, + struct mlx5e_sq_param *param, + struct mlx5e_sq *sq) +{ + int err; + + err = mlx5e_create_sq(c, tc, param, sq); + if (err) + return err; + + err = mlx5e_enable_sq(sq, param); + if (err) + goto err_destroy_sq; + + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); + if (err) + goto err_disable_sq; + + set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); + + return 0; + +err_disable_sq: + mlx5e_disable_sq(sq); +err_destroy_sq: + mlx5e_destroy_sq(sq); + + return err; +} + +static inline void netif_tx_disable_queue(struct netdev_queue *txq) +{ + __netif_tx_lock_bh(txq); + netif_tx_stop_queue(txq); + __netif_tx_unlock_bh(txq); +} + +static void mlx5e_close_sq(struct mlx5e_sq *sq) +{ + clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + napi_synchronize(&sq->channel->napi); /* prevent netif_tx_wake_queue */ + netif_tx_disable_queue(sq->txq); + + /* ensure hw is notified of all pending wqes */ + if (mlx5e_sq_has_room_for(sq, 1)) + mlx5e_send_nop(sq, true); + + mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + while (sq->cc != sq->pc) /* wait till sq is empty */ + msleep(20); + + /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ + napi_synchronize(&sq->channel->napi); + + mlx5e_disable_sq(sq); + mlx5e_destroy_sq(sq); +} + +static int mlx5e_create_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int eqn_not_used; + unsigned int irqn; + int err; + u32 i; + + param->wq.buf_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = cpu_to_node(c->cpu); + param->eq_ix = c->ix; + + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + return err; + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); + + cq->napi = &c->napi; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + *mcq->set_ci_db = 0; + *mcq->arm_db = 0; + mcq->vector = param->eq_ix; + mcq->comp = mlx5e_completion_event; + mcq->event = mlx5e_cq_error_event; + mcq->irqn = irqn; + mcq->uar = &priv->cq_uar; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->channel = c; + cq->priv = priv; + + return 0; +} + +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +{ + struct mlx5e_priv *priv = cq->priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + + void *in; + void *cqc; + int inlen; + unsigned int irqn_not_used; + int eqn; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, param->cqc, sizeof(param->cqc)); + + mlx5_fill_page_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen); + + kvfree(in); + + if (err) + return err; + + mlx5e_cq_arm(cq); + + return 0; +} + +static void mlx5e_disable_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_priv *priv = cq->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_core_destroy_cq(mdev, &cq->mcq); +} + +static int mlx5e_open_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq, + u16 moderation_usecs, + u16 moderation_frames) +{ + int err; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + err = mlx5e_create_cq(c, param, cq); + if (err) + return err; + + err = mlx5e_enable_cq(cq, param); + if (err) + goto err_destroy_cq; + + err = mlx5_core_modify_cq_moderation(mdev, &cq->mcq, + moderation_usecs, + moderation_frames); + if (err) + goto err_destroy_cq; + + return 0; + +err_destroy_cq: + mlx5e_destroy_cq(cq); + + return err; +} + +static void mlx5e_close_cq(struct mlx5e_cq *cq) +{ + mlx5e_disable_cq(cq); + mlx5e_destroy_cq(cq); +} + +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) +{ + return cpumask_first(priv->mdev->priv.irq_info[ix].mask); +} + +static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_channel_param *cparam) +{ + struct mlx5e_priv *priv = c->priv; + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, + priv->params.tx_cq_moderation_usec, + priv->params.tx_cq_moderation_pkts); + if (err) + goto err_close_tx_cqs; + } + + return 0; + +err_close_tx_cqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_cq(&c->sq[tc].cq); + + return err; +} + +static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->sq[tc].cq); +} + +static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_channel_param *cparam) +{ + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); + if (err) + goto err_close_sqs; + } + + return 0; + +err_close_sqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_sq(&c->sq[tc]); + + return err; +} + +static void mlx5e_close_sqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_sq(&c->sq[tc]); +} + +static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) +{ + int i; + + for (i = 0; i < MLX5E_MAX_NUM_TC; i++) + priv->channeltc_to_txq_map[ix][i] = + ix + i * priv->params.num_channels; +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_channel_param *cparam, + struct mlx5e_channel **cp) +{ + struct net_device *netdev = priv->netdev; + int cpu = mlx5e_get_cpu(priv, ix); + struct mlx5e_channel *c; + int err; + + c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->ix = ix; + c->cpu = cpu; + c->pdev = &priv->mdev->pdev->dev; + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mr.key); + c->num_tc = priv->params.num_tc; + + mlx5e_build_channeltc_to_txq_map(priv, ix); + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + + err = mlx5e_open_tx_cqs(c, cparam); + if (err) + goto err_napi_del; + + err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, + priv->params.rx_cq_moderation_usec, + priv->params.rx_cq_moderation_pkts); + if (err) + goto err_close_tx_cqs; + + napi_enable(&c->napi); + + err = mlx5e_open_sqs(c, cparam); + if (err) + goto err_disable_napi; + + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); + if (err) + goto err_close_sqs; + + netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); + *cp = c; + + return 0; + +err_close_sqs: + mlx5e_close_sqs(c); + +err_disable_napi: + napi_disable(&c->napi); + mlx5e_close_cq(&c->rq.cq); + +err_close_tx_cqs: + mlx5e_close_tx_cqs(c); + +err_napi_del: + netif_napi_del(&c->napi); + kfree(c); + + return err; +} + +static void mlx5e_close_channel(struct mlx5e_channel *c) +{ + mlx5e_close_rq(&c->rq); + mlx5e_close_sqs(c); + napi_disable(&c->napi); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_tx_cqs(c); + netif_napi_del(&c->napi); + kfree(c); +} + +static void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); + MLX5_SET(wq, wq, pd, priv->pdn); + + param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); + param->wq.linear = 1; +} + +static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, priv->pdn); + + param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); + param->max_inline = priv->params.tx_max_inline; +} + +static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); +} + +static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, + struct mlx5e_channel_param *cparam) +{ + memset(cparam, 0, sizeof(*cparam)); + + mlx5e_build_rq_param(priv, &cparam->rq); + mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); +} + +static int mlx5e_open_channels(struct mlx5e_priv *priv) +{ + struct mlx5e_channel_param cparam; + int nch = priv->params.num_channels; + int err = -ENOMEM; + int i; + int j; + + priv->channel = kcalloc(nch, sizeof(struct mlx5e_channel *), + GFP_KERNEL); + + priv->txq_to_sq_map = kcalloc(nch * priv->params.num_tc, + sizeof(struct mlx5e_sq *), GFP_KERNEL); + + if (!priv->channel || !priv->txq_to_sq_map) + goto err_free_txq_to_sq_map; + + mlx5e_build_channel_param(priv, &cparam); + for (i = 0; i < nch; i++) { + err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); + if (err) + goto err_close_channels; + } + + for (j = 0; j < nch; j++) { + err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); + if (err) + goto err_close_channels; + } + + return 0; + +err_close_channels: + for (i--; i >= 0; i--) + mlx5e_close_channel(priv->channel[i]); + +err_free_txq_to_sq_map: + kfree(priv->txq_to_sq_map); + kfree(priv->channel); + + return err; +} + +static void mlx5e_close_channels(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_close_channel(priv->channel[i]); + + kfree(priv->txq_to_sq_map); + kfree(priv->channel); +} + +static int mlx5e_rx_hash_fn(int hfunc) +{ + return (hfunc == ETH_RSS_HASH_TOP) ? + MLX5_RX_HASH_FN_TOEPLITZ : + MLX5_RX_HASH_FN_INVERTED_XOR8; +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) +{ + int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { + int ix = i; + + if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); + + ix = priv->params.indirection_rqt[ix]; + ix = ix % priv->params.num_channels; + MLX5_SET(rqtc, rqtc, rq_num[i], + test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[ix]->rq.rqn : + priv->drop_rq.rqn); + } +} + +static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, void *rqtc, + enum mlx5e_rqt_ix rqt_ix) +{ + + switch (rqt_ix) { + case MLX5E_INDIRECTION_RQT: + mlx5e_fill_indir_rqt_rqns(priv, rqtc); + + break; + + default: /* MLX5E_SINGLE_RQ_RQT */ + MLX5_SET(rqtc, rqtc, rq_num[0], + test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[0]->rq.rqn : + priv->drop_rq.rqn); + + break; + } +} + +static int mlx5e_create_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + void *rqtc; + int inlen; + int sz; + int err; + + sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 1 : MLX5E_INDIR_RQT_SIZE; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + + err = mlx5_core_create_rqt(mdev, in, inlen, &priv->rqtn[rqt_ix]); + + kvfree(in); + + return err; +} + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + void *rqtc; + int inlen; + int sz; + int err; + + sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 1 : MLX5E_INDIR_RQT_SIZE; + + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + + mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + + MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); + + err = mlx5_core_modify_rqt(mdev, priv->rqtn[rqt_ix], in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +{ + mlx5_core_destroy_rqt(priv->mdev, priv->rqtn[rqt_ix]); +} + +static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) +{ + mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_redirect_rqt(priv, MLX5E_SINGLE_RQ_RQT); +} + +static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) +{ + if (!priv->params.lro_en) + return; + +#define ROUGH_MAX_L2_L3_HDR_SZ 256 + + MLX5_SET(tirc, tirc, lro_enable_mask, + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (priv->params.lro_wqe_sz - + ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, + MLX5_CAP_ETH(priv->mdev, + lro_timer_supported_periods[2])); +} + +static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *tirc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.lro, 1); + tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); + + mlx5e_build_tir_ctx_lro(tirc, priv); + + err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, + u32 tirn) +{ + void *in; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + err = mlx5_core_modify_tir(mdev, tirn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) +{ + int err; + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev, + priv->tirn[i]); + if (err) + return err; + } + + return 0; +} + +static int mlx5e_set_dev_port_mtu(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int hw_mtu; + int err; + + err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(netdev->mtu), 1); + if (err) + return err; + + mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + + if (MLX5E_HW2SW_MTU(hw_mtu) != netdev->mtu) + netdev_warn(netdev, "%s: Port MTU %d is different than netdev mtu %d\n", + __func__, MLX5E_HW2SW_MTU(hw_mtu), netdev->mtu); + + netdev->mtu = MLX5E_HW2SW_MTU(hw_mtu); + return 0; +} + +int mlx5e_open_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int num_txqs; + int err; + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + num_txqs = priv->params.num_channels * priv->params.num_tc; + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, priv->params.num_channels); + + err = mlx5e_set_dev_port_mtu(netdev); + if (err) + goto err_clear_state_opened_flag; + + err = mlx5e_open_channels(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", + __func__, err); + goto err_clear_state_opened_flag; + } + + err = mlx5e_refresh_tirs_self_loopback_enable(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", + __func__, err); + goto err_close_channels; + } + + mlx5e_update_carrier(priv); + mlx5e_redirect_rqts(priv); + + schedule_delayed_work(&priv->update_stats_work, 0); + + return 0; + +err_close_channels: + mlx5e_close_channels(priv); +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &priv->state); + return err; +} + +static int mlx5e_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_close_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + mlx5e_redirect_rqts(priv); + netif_carrier_off(priv->netdev); + mlx5e_close_channels(priv); + + return 0; +} + +static int mlx5e_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_close_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_create_drop_rq(struct mlx5e_priv *priv, + struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) +{ + struct mlx5_core_dev *mdev = priv->mdev; + void *rqc = param->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + int err; + + param->wq.db_numa_node = param->wq.buf_numa_node; + + err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, + &rq->wq_ctrl); + if (err) + return err; + + rq->priv = priv; + + return 0; +} + +static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, + struct mlx5e_cq *cq, + struct mlx5e_cq_param *param) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int eqn_not_used; + unsigned int irqn; + int err; + + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + return err; + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + *mcq->set_ci_db = 0; + *mcq->arm_db = 0; + mcq->vector = param->eq_ix; + mcq->comp = mlx5e_completion_event; + mcq->event = mlx5e_cq_error_event; + mcq->irqn = irqn; + mcq->uar = &priv->cq_uar; + + cq->priv = priv; + + return 0; +} + +static int mlx5e_open_drop_rq(struct mlx5e_priv *priv) +{ + struct mlx5e_cq_param cq_param; + struct mlx5e_rq_param rq_param; + struct mlx5e_rq *rq = &priv->drop_rq; + struct mlx5e_cq *cq = &priv->drop_rq.cq; + int err; + + memset(&cq_param, 0, sizeof(cq_param)); + memset(&rq_param, 0, sizeof(rq_param)); + mlx5e_build_rx_cq_param(priv, &cq_param); + mlx5e_build_rq_param(priv, &rq_param); + + err = mlx5e_create_drop_cq(priv, cq, &cq_param); + if (err) + return err; + + err = mlx5e_enable_cq(cq, &cq_param); + if (err) + goto err_destroy_cq; + + err = mlx5e_create_drop_rq(priv, rq, &rq_param); + if (err) + goto err_disable_cq; + + err = mlx5e_enable_rq(rq, &rq_param); + if (err) + goto err_destroy_rq; + + return 0; + +err_destroy_rq: + mlx5e_destroy_rq(&priv->drop_rq); + +err_disable_cq: + mlx5e_disable_cq(&priv->drop_rq.cq); + +err_destroy_cq: + mlx5e_destroy_cq(&priv->drop_rq.cq); + + return err; +} + +static void mlx5e_close_drop_rq(struct mlx5e_priv *priv) +{ + mlx5e_disable_rq(&priv->drop_rq); + mlx5e_destroy_rq(&priv->drop_rq); + mlx5e_disable_cq(&priv->drop_rq.cq); + mlx5e_destroy_cq(&priv->drop_rq.cq); +} + +static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + memset(in, 0, sizeof(in)); + + MLX5_SET(tisc, tisc, prio, tc); + MLX5_SET(tisc, tisc, transport_domain, priv->tdn); + + return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); +} + +static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc) +{ + mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); +} + +static int mlx5e_create_tises(struct mlx5e_priv *priv) +{ + int err; + int tc; + + for (tc = 0; tc < priv->params.num_tc; tc++) { + err = mlx5e_create_tis(priv, tc); + if (err) + goto err_close_tises; + } + + return 0; + +err_close_tises: + for (tc--; tc >= 0; tc--) + mlx5e_destroy_tis(priv, tc); + + return err; +} + +static void mlx5e_destroy_tises(struct mlx5e_priv *priv) +{ + int tc; + + for (tc = 0; tc < priv->params.num_tc; tc++) + mlx5e_destroy_tis(priv, tc); +} + +static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) +{ + void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + + MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) + +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) + +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + + mlx5e_build_tir_ctx_lro(tirc, priv); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + + switch (tt) { + case MLX5E_TT_ANY: + MLX5_SET(tirc, tirc, indirect_table, + priv->rqtn[MLX5E_SINGLE_RQ_RQT]); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); + break; + default: + MLX5_SET(tirc, tirc, indirect_table, + priv->rqtn[MLX5E_INDIRECTION_RQT]); + MLX5_SET(tirc, tirc, rx_hash_fn, + mlx5e_rx_hash_fn(priv->params.rss_hfunc)); + if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { + void *rss_key = MLX5_ADDR_OF(tirc, tirc, + rx_hash_toeplitz_key); + size_t len = MLX5_FLD_SZ_BYTES(tirc, + rx_hash_toeplitz_key); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, priv->params.toeplitz_hash_key, len); + } + break; + } + + switch (tt) { + case MLX5E_TT_IPV4_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV6_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV4_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV6_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV4_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + + case MLX5E_TT_IPV6: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + } +} + +static int mlx5e_create_tir(struct mlx5e_priv *priv, int tt) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + void *tirc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + + mlx5e_build_tir_ctx(priv, tirc, tt); + + err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]); + + kvfree(in); + + return err; +} + +static void mlx5e_destroy_tir(struct mlx5e_priv *priv, int tt) +{ + mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]); +} + +static int mlx5e_create_tirs(struct mlx5e_priv *priv) +{ + int err; + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + err = mlx5e_create_tir(priv, i); + if (err) + goto err_destroy_tirs; + } + + return 0; + +err_destroy_tirs: + for (i--; i >= 0; i--) + mlx5e_destroy_tir(priv, i); + + return err; +} + +static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) + mlx5e_destroy_tir(priv, i); +} + +static struct rtnl_link_stats64 * +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + + stats->rx_packets = vstats->rx_packets; + stats->rx_bytes = vstats->rx_bytes; + stats->tx_packets = vstats->tx_packets; + stats->tx_bytes = vstats->tx_bytes; + stats->multicast = vstats->rx_multicast_packets + + vstats->tx_multicast_packets; + stats->tx_errors = vstats->tx_error_packets; + stats->rx_errors = vstats->rx_error_packets; + stats->tx_dropped = vstats->tx_queue_dropped; + stats->rx_crc_errors = 0; + stats->rx_length_errors = 0; + + return stats; +} + +static void mlx5e_set_rx_mode(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + schedule_work(&priv->set_rx_mode_work); +} + +static int mlx5e_set_mac(struct net_device *netdev, void *addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + netif_addr_lock_bh(netdev); + ether_addr_copy(netdev->dev_addr, saddr->sa_data); + netif_addr_unlock_bh(netdev); + + schedule_work(&priv->set_rx_mode_work); + + return 0; +} + +static int mlx5e_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + netdev_features_t changes = features ^ netdev->features; + + mutex_lock(&priv->state_lock); + + if (changes & NETIF_F_LRO) { + bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + + if (was_opened) + mlx5e_close_locked(priv->netdev); + + priv->params.lro_en = !!(features & NETIF_F_LRO); + mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV4_TCP); + mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV6_TCP); + + if (was_opened) + err = mlx5e_open_locked(priv->netdev); + } + + mutex_unlock(&priv->state_lock); + + if (changes & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) + mlx5e_enable_vlan_filter(priv); + else + mlx5e_disable_vlan_filter(priv); + } + + return err; +} + +static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool was_opened; + int max_mtu; + int err = 0; + + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + + max_mtu = MLX5E_HW2SW_MTU(max_mtu); + + if (new_mtu > max_mtu) { + netdev_err(netdev, + "%s: Bad MTU (%d) > (%d) Max\n", + __func__, new_mtu, max_mtu); + return -EINVAL; + } + + mutex_lock(&priv->state_lock); + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(netdev); + + netdev->mtu = new_mtu; + + if (was_opened) + err = mlx5e_open_locked(netdev); + + mutex_unlock(&priv->state_lock); + + return err; +} + +static struct net_device_ops mlx5e_netdev_ops = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_set_rx_mode = mlx5e_set_rx_mode, + .ndo_set_mac_address = mlx5e_set_mac, + .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, + .ndo_set_features = mlx5e_set_features, + .ndo_change_mtu = mlx5e_change_mtu, +}; + +static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -ENOTSUPP; + if (!MLX5_CAP_GEN(mdev, eth_net_offloads) || + !MLX5_CAP_GEN(mdev, nic_flow_table) || + !MLX5_CAP_ETH(mdev, csum_cap) || + !MLX5_CAP_ETH(mdev, max_lso_cap) || + !MLX5_CAP_ETH(mdev, vlan_cap) || + !MLX5_CAP_ETH(mdev, rss_ind_tbl_cap) || + MLX5_CAP_FLOWTABLE(mdev, + flow_table_properties_nic_receive.max_ft_level) + < 3) { + mlx5_core_warn(mdev, + "Not creating net device, some required device capabilities are missing\n"); + return -ENOTSUPP; + } + if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) + mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); + + return 0; +} + +u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) +{ + int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; + + return bf_buf_size - + sizeof(struct mlx5e_tx_wqe) + + 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; +} + +static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + int num_channels) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int i; + + priv->params.log_sq_size = + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + priv->params.log_rq_size = + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.rx_cq_moderation_usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + priv->params.rx_cq_moderation_pkts = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + priv->params.tx_cq_moderation_usec = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + priv->params.tx_cq_moderation_pkts = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); + priv->params.min_rx_wqes = + MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; + priv->params.num_tc = 1; + priv->params.default_vlan_prio = 0; + priv->params.rss_hfunc = ETH_RSS_HASH_XOR; + + netdev_rss_key_fill(priv->params.toeplitz_hash_key, + sizeof(priv->params.toeplitz_hash_key)); + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + priv->params.indirection_rqt[i] = i % num_channels; + + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = num_channels; + priv->default_vlan_prio = priv->params.default_vlan_prio; + + spin_lock_init(&priv->async_events_spinlock); + mutex_init(&priv->state_lock); + + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); + INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); +} + +static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); +} + +static void mlx5e_build_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + SET_NETDEV_DEV(netdev, &mdev->pdev->dev); + + if (priv->params.num_tc > 1) + mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; + + netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_ethtool_ops; + + netdev->vlan_features |= NETIF_F_SG; + netdev->vlan_features |= NETIF_F_IP_CSUM; + netdev->vlan_features |= NETIF_F_IPV6_CSUM; + netdev->vlan_features |= NETIF_F_GRO; + netdev->vlan_features |= NETIF_F_TSO; + netdev->vlan_features |= NETIF_F_TSO6; + netdev->vlan_features |= NETIF_F_RXCSUM; + netdev->vlan_features |= NETIF_F_RXHASH; + + if (!!MLX5_CAP_ETH(mdev, lro_cap)) + netdev->vlan_features |= NETIF_F_LRO; + + netdev->hw_features = netdev->vlan_features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + netdev->features = netdev->hw_features; + if (!priv->params.lro_en) + netdev->features &= ~NETIF_F_LRO; + + netdev->features |= NETIF_F_HIGHDMA; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + mlx5e_set_netdev_dev_addr(netdev); +} + +static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, + struct mlx5_core_mr *mr) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_create_mkey_mbox_in *in; + int err; + + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + in->seg.flags = MLX5_PERM_LOCAL_WRITE | + MLX5_PERM_LOCAL_READ | + MLX5_ACCESS_MODE_PA; + in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, + NULL); + + kvfree(in); + + return err; +} + +static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) +{ + struct net_device *netdev; + struct mlx5e_priv *priv; + int nch = mlx5e_get_max_num_channels(mdev); + int err; + + if (mlx5e_check_required_hca_cap(mdev)) + return NULL; + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch, nch); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; + } + + mlx5e_build_netdev_priv(mdev, netdev, nch); + mlx5e_build_netdev(netdev); + + netif_carrier_off(netdev); + + priv = netdev_priv(netdev); + + err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); + if (err) { + mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); + goto err_free_netdev; + } + + err = mlx5_core_alloc_pd(mdev, &priv->pdn); + if (err) { + mlx5_core_err(mdev, "alloc pd failed, %d\n", err); + goto err_unmap_free_uar; + } + + err = mlx5_alloc_transport_domain(mdev, &priv->tdn); + if (err) { + mlx5_core_err(mdev, "alloc td failed, %d\n", err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); + if (err) { + mlx5_core_err(mdev, "create mkey failed, %d\n", err); + goto err_dealloc_transport_domain; + } + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(mdev, "create tises failed, %d\n", err); + goto err_destroy_mkey; + } + + err = mlx5e_open_drop_rq(priv); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_tises; + } + + err = mlx5e_create_rqt(priv, MLX5E_INDIRECTION_RQT); + if (err) { + mlx5_core_warn(mdev, "create rqt(INDIR) failed, %d\n", err); + goto err_close_drop_rq; + } + + err = mlx5e_create_rqt(priv, MLX5E_SINGLE_RQ_RQT); + if (err) { + mlx5_core_warn(mdev, "create rqt(SINGLE) failed, %d\n", err); + goto err_destroy_rqt_indir; + } + + err = mlx5e_create_tirs(priv); + if (err) { + mlx5_core_warn(mdev, "create tirs failed, %d\n", err); + goto err_destroy_rqt_single; + } + + err = mlx5e_create_flow_tables(priv); + if (err) { + mlx5_core_warn(mdev, "create flow tables failed, %d\n", err); + goto err_destroy_tirs; + } + + mlx5e_init_eth_addr(priv); + + err = register_netdev(netdev); + if (err) { + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_destroy_flow_tables; + } + + mlx5e_enable_async_events(priv); + schedule_work(&priv->set_rx_mode_work); + + return priv; + +err_destroy_flow_tables: + mlx5e_destroy_flow_tables(priv); + +err_destroy_tirs: + mlx5e_destroy_tirs(priv); + +err_destroy_rqt_single: + mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); + +err_destroy_rqt_indir: + mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); + +err_close_drop_rq: + mlx5e_close_drop_rq(priv); + +err_destroy_tises: + mlx5e_destroy_tises(priv); + +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, &priv->mr); + +err_dealloc_transport_domain: + mlx5_dealloc_transport_domain(mdev, priv->tdn); + +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, priv->pdn); + +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &priv->cq_uar); + +err_free_netdev: + free_netdev(netdev); + + return NULL; +} + +static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + + schedule_work(&priv->set_rx_mode_work); + mlx5e_disable_async_events(priv); + flush_scheduled_work(); + unregister_netdev(netdev); + mlx5e_destroy_flow_tables(priv); + mlx5e_destroy_tirs(priv); + mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); + mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_close_drop_rq(priv); + mlx5e_destroy_tises(priv); + mlx5_core_destroy_mkey(priv->mdev, &priv->mr); + mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); + mlx5_core_dealloc_pd(priv->mdev, priv->pdn); + mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); + free_netdev(netdev); +} + +static void *mlx5e_get_netdev(void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + + return priv->netdev; +} + +static struct mlx5_interface mlx5e_interface = { + .add = mlx5e_create_netdev, + .remove = mlx5e_destroy_netdev, + .event = mlx5e_async_event, + .protocol = MLX5_INTERFACE_PROTOCOL_ETH, + .get_dev = mlx5e_get_netdev, +}; + +void mlx5e_init(void) +{ + mlx5_register_interface(&mlx5e_interface); +} + +void mlx5e_cleanup(void) +{ + mlx5_unregister_interface(&mlx5e_interface); +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c new file mode 100644 index 000000000..cf0098596 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include "en.h" + +static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct sk_buff *skb; + dma_addr_t dma_addr; + + skb = netdev_alloc_skb(rq->netdev, rq->wqe_sz); + if (unlikely(!skb)) + return -ENOMEM; + + dma_addr = dma_map_single(rq->pdev, + /* hw start padding */ + skb->data, + /* hw end padding */ + rq->wqe_sz, + DMA_FROM_DEVICE); + + if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) + goto err_free_skb; + + skb_reserve(skb, MLX5E_NET_IP_ALIGN); + + *((dma_addr_t *)skb->cb) = dma_addr; + wqe->data.addr = cpu_to_be64(dma_addr + MLX5E_NET_IP_ALIGN); + + rq->skb[ix] = skb; + + return 0; + +err_free_skb: + dev_kfree_skb(skb); + + return -ENOMEM; +} + +bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state))) + return false; + + while (!mlx5_wq_ll_is_full(wq)) { + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + + if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, wq->head))) + break; + + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); + } + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); + + return !mlx5_wq_ll_is_full(wq); +} + +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) +{ + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); + struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + ETH_HLEN); + struct tcphdr *tcp; + + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || + (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); + + u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETH_HLEN; + + if (eth->h_proto == htons(ETH_P_IP)) { + tcp = (struct tcphdr *)(skb->data + ETH_HLEN + + sizeof(struct iphdr)); + ipv6 = NULL; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + } else { + tcp = (struct tcphdr *)(skb->data + ETH_HLEN + + sizeof(struct ipv6hdr)); + ipv4 = NULL; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + } + + if (get_cqe_lro_tcppsh(cqe)) + tcp->psh = 1; + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } + + if (ipv4) { + ipv4->ttl = cqe->lro_min_ttl; + ipv4->tot_len = cpu_to_be16(tot_len); + ipv4->check = 0; + ipv4->check = ip_fast_csum((unsigned char *)ipv4, + ipv4->ihl); + } else { + ipv6->hop_limit = cqe->lro_min_ttl; + ipv6->payload_len = cpu_to_be16(tot_len - + sizeof(struct ipv6hdr)); + } +} + +static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, + struct sk_buff *skb) +{ + u8 cht = cqe->rss_hash_type; + int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 : + (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 : + PKT_HASH_TYPE_NONE; + skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); +} + +static inline bool is_first_ethertype_ip(struct sk_buff *skb) +{ + __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto; + + return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); +} + +static inline void mlx5e_handle_csum(struct net_device *netdev, + struct mlx5_cqe64 *cqe, + struct mlx5e_rq *rq, + struct sk_buff *skb) +{ + if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) + goto csum_none; + + if (likely(cqe->hds_ip_ext & CQE_L4_OK)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else if (is_first_ethertype_ip(skb)) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + rq->stats.csum_sw++; + } else { + goto csum_none; + } + + return; + +csum_none: + skb->ip_summed = CHECKSUM_NONE; + rq->stats.csum_none++; +} + +static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + struct mlx5e_rq *rq, + struct sk_buff *skb) +{ + struct net_device *netdev = rq->netdev; + u32 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + int lro_num_seg; + + skb_put(skb, cqe_bcnt); + + lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; + if (lro_num_seg > 1) { + mlx5e_lro_update_hdr(skb, cqe); + skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + rq->stats.lro_packets++; + rq->stats.lro_bytes += cqe_bcnt; + } + + mlx5e_handle_csum(netdev, cqe, rq, skb); + + skb->protocol = eth_type_trans(skb, netdev); + + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + if (cqe_has_vlan(cqe)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(cqe->vlan_info)); +} + +bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) +{ + struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + int i; + + /* avoid accessing cq (dma coherent memory) if not needed */ + if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags)) + return false; + + for (i = 0; i < budget; i++) { + struct mlx5e_rx_wqe *wqe; + struct mlx5_cqe64 *cqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + skb = rq->skb[wqe_counter]; + prefetch(skb->data); + rq->skb[wqe_counter] = NULL; + + dma_unmap_single(rq->pdev, + *((dma_addr_t *)skb->cb), + rq->wqe_sz, + DMA_FROM_DEVICE); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + dev_kfree_skb(skb); + goto wq_ll_pop; + } + + mlx5e_build_rx_skb(cqe, rq, skb); + rq->stats.packets++; + napi_gro_receive(cq->napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + if (i == budget) { + set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); + return true; + } + + return false; +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c new file mode 100644 index 000000000..1341b1d3c --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/tcp.h> +#include <linux/if_vlan.h> +#include "en.h" + +#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS +#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ + MLX5E_SQ_NOPS_ROOM) + +void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); + + sq->skb[pi] = NULL; + sq->pc++; + + if (notify_hw) { + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, wqe, 0); + } +} + +static inline void mlx5e_tx_dma_unmap(struct device *pdev, + struct mlx5e_sq_dma *dma) +{ + switch (dma->type) { + case MLX5E_DMA_MAP_SINGLE: + dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + case MLX5E_DMA_MAP_PAGE: + dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + default: + WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); + } +} + +static inline void mlx5e_dma_push(struct mlx5e_sq *sq, + dma_addr_t addr, + u32 size, + enum mlx5e_dma_map_type map_type) +{ + sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; + sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; + sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; + sq->dma_fifo_pc++; +} + +static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) +{ + return &sq->dma_fifo[i & sq->dma_fifo_mask]; +} + +static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, struct sk_buff *skb) +{ + int i; + + for (i = 0; i < MLX5E_TX_SKB_CB(skb)->num_dma; i++) { + struct mlx5e_sq_dma *last_pushed_dma = + mlx5e_dma_get(sq, --sq->dma_fifo_pc); + + mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); + } +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int channel_ix = fallback(dev, skb); + int up = skb_vlan_tag_present(skb) ? + skb->vlan_tci >> VLAN_PRIO_SHIFT : + priv->default_vlan_prio; + int tc = netdev_get_prio_tc_map(dev, up); + + return priv->channeltc_to_txq_map[channel_ix][tc]; +} + +static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, + struct sk_buff *skb, bool bf) +{ + /* Some NIC TX decisions, e.g loopback, are based on the packet + * headers and occur before the data gather. + * Therefore these headers must be copied into the WQE + */ +#define MLX5E_MIN_INLINE ETH_HLEN + + if (bf) { + u16 ihs = skb_headlen(skb); + + if (skb_vlan_tag_present(skb)) + ihs += VLAN_HLEN; + + if (ihs <= sq->max_inline) + return skb_headlen(skb); + } + + return MLX5E_MIN_INLINE; +} + +static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) +{ + struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; + int cpy1_sz = 2 * ETH_ALEN; + int cpy2_sz = ihs - cpy1_sz; + + skb_copy_from_linear_data(skb, vhdr, cpy1_sz); + skb_pull_inline(skb, cpy1_sz); + vhdr->h_vlan_proto = skb->vlan_proto; + vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); + skb_copy_from_linear_data(skb, &vhdr->h_vlan_encapsulated_proto, + cpy2_sz); + skb_pull_inline(skb, cpy2_sz); +} + +static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + u8 opcode = MLX5_OPCODE_SEND; + dma_addr_t dma_addr = 0; + bool bf = false; + u16 headlen; + u16 ds_cnt; + u16 ihs; + int i; + + memset(wqe, 0, sizeof(*wqe)); + + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + else + sq->stats.csum_offload_none++; + + if (sq->cc != sq->prev_cc) { + sq->prev_cc = sq->cc; + sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0; + } + + if (skb_is_gso(skb)) { + u32 payload_len; + + eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + opcode = MLX5_OPCODE_LSO; + ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + payload_len = skb->len - ihs; + MLX5E_TX_SKB_CB(skb)->num_bytes = skb->len + + (skb_shinfo(skb)->gso_segs - 1) * ihs; + sq->stats.tso_packets++; + sq->stats.tso_bytes += payload_len; + } else { + bf = sq->bf_budget && + !skb->xmit_more && + !skb_shinfo(skb)->nr_frags; + ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); + MLX5E_TX_SKB_CB(skb)->num_bytes = max_t(unsigned int, skb->len, + ETH_ZLEN); + } + + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs); + ihs += VLAN_HLEN; + } else { + skb_copy_from_linear_data(skb, eseg->inline_hdr_start, ihs); + skb_pull_inline(skb, ihs); + } + + eseg->inline_hdr_sz = cpu_to_be16(ihs); + + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start), + MLX5_SEND_WQE_DS); + dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; + + MLX5E_TX_SKB_CB(skb)->num_dma = 0; + + headlen = skb_headlen(skb); + if (headlen) { + dma_addr = dma_map_single(sq->pdev, skb->data, headlen, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(headlen); + + mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); + MLX5E_TX_SKB_CB(skb)->num_dma++; + + dseg++; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + int fsz = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + MLX5E_TX_SKB_CB(skb)->num_dma++; + + dseg++; + } + + ds_cnt += MLX5E_TX_SKB_CB(skb)->num_dma; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + + sq->skb[pi] = skb; + + MLX5E_TX_SKB_CB(skb)->num_wqebbs = DIV_ROUND_UP(ds_cnt, + MLX5_SEND_WQEBB_NUM_DS); + sq->pc += MLX5E_TX_SKB_CB(skb)->num_wqebbs; + + netdev_tx_sent_queue(sq->txq, MLX5E_TX_SKB_CB(skb)->num_bytes); + + if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) { + netif_tx_stop_queue(sq->txq); + sq->stats.stopped++; + } + + if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { + int bf_sz = 0; + + if (bf && sq->uar_bf_map) + bf_sz = MLX5E_TX_SKB_CB(skb)->num_wqebbs << 3; + + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, wqe, bf_sz); + } + + /* fill sq edge with nops to avoid wqe wrap around */ + while ((sq->pc & wq->sz_m1) > sq->edge) + mlx5e_send_nop(sq, false); + + sq->bf_budget = bf ? sq->bf_budget - 1 : 0; + + sq->stats.packets++; + return NETDEV_TX_OK; + +dma_unmap_wqe_err: + sq->stats.dropped++; + mlx5e_dma_unmap_wqe_err(sq, skb); + + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; +} + +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_sq *sq = priv->txq_to_sq_map[skb_get_queue_mapping(skb)]; + + return mlx5e_sq_xmit(sq, skb); +} + +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_sq *sq; + u32 dma_fifo_cc; + u32 nbytes; + u16 npkts; + u16 sqcc; + int i; + + /* avoid accessing cq (dma coherent memory) if not needed */ + if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags)) + return false; + + sq = container_of(cq, struct mlx5e_sq, cq); + + npkts = 0; + nbytes = 0; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + /* avoid dirtying sq cache line every cqe */ + dma_fifo_cc = sq->dma_fifo_cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct sk_buff *skb; + u16 ci; + int j; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + skb = sq->skb[ci]; + + if (unlikely(!skb)) { /* nop */ + sq->stats.nop++; + sqcc++; + continue; + } + + for (j = 0; j < MLX5E_TX_SKB_CB(skb)->num_dma; j++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + npkts++; + nbytes += MLX5E_TX_SKB_CB(skb)->num_bytes; + sqcc += MLX5E_TX_SKB_CB(skb)->num_wqebbs; + dev_kfree_skb(skb); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); + + if (netif_tx_queue_stopped(sq->txq) && + mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) && + likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) { + netif_tx_wake_queue(sq->txq); + sq->stats.wake++; + } + if (i == MLX5E_TX_CQ_POLL_BUDGET) { + set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); + return true; + } + + return false; +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c new file mode 100644 index 000000000..2c7cb6755 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) +{ + struct mlx5_cqwq *wq = &cq->wq; + u32 ci = mlx5_cqwq_get_ci(wq); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + int cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + int sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + + if (cqe_ownership_bit != sw_ownership_val) + return NULL; + + /* ensure cqe content is read after cqe ownership bit */ + rmb(); + + return cqe; +} + +int mlx5e_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, + napi); + bool busy = false; + int i; + + clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); + + for (i = 0; i < c->num_tc; i++) + busy |= mlx5e_poll_tx_cq(&c->sq[i].cq); + + busy |= mlx5e_poll_rx_cq(&c->rq.cq, budget); + + busy |= mlx5e_post_rx_wqes(&c->rq); + + if (busy) + return budget; + + napi_complete(napi); + + /* avoid losing completion event during/after polling cqs */ + if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) { + napi_schedule(napi); + return 0; + } + + for (i = 0; i < c->num_tc; i++) + mlx5e_cq_arm(&c->sq[i].cq); + mlx5e_cq_arm(&c->rq.cq); + + return 0; +} + +void mlx5e_completion_event(struct mlx5_core_cq *mcq) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + + set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); + set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); + barrier(); + napi_schedule(cq->napi); +} + +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + struct mlx5e_channel *c = cq->channel; + struct mlx5e_priv *priv = c->priv; + struct net_device *netdev = priv->netdev; + + netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n", + __func__, mcq->cqn, event); +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 58800e4f3..713ead583 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -339,15 +339,15 @@ static void init_eq_buf(struct mlx5_eq *eq) int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, struct mlx5_uar *uar) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_priv *priv = &dev->priv; struct mlx5_create_eq_mbox_in *in; struct mlx5_create_eq_mbox_out out; int err; int inlen; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); - err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE, - &eq->buf); + eq->cons_index = 0; + err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; @@ -378,14 +378,15 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, goto err_in; } - snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s", + snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); + eq->eqn = out.eq_number; - eq->irqn = vecidx; + eq->irqn = priv->msix_arr[vecidx].vector; eq->dev = dev; eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, - eq->name, eq); + err = request_irq(eq->irqn, mlx5_msix_handler, 0, + priv->irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -401,7 +402,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, return 0; err_irq: - free_irq(table->msix_arr[vecidx].vector, eq); + free_irq(priv->msix_arr[vecidx].vector, eq); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -417,16 +418,15 @@ EXPORT_SYMBOL_GPL(mlx5_create_map_eq); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { - struct mlx5_eq_table *table = &dev->priv.eq_table; int err; mlx5_debug_eq_remove(dev, eq); - free_irq(table->msix_arr[eq->irqn].vector, eq); + free_irq(eq->irqn, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); - synchronize_irq(table->msix_arr[eq->irqn].vector); + synchronize_irq(eq->irqn); mlx5_buf_free(dev, &eq->buf); return err; @@ -456,7 +456,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u32 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; - if (dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG) + if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, @@ -479,7 +479,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) err = mlx5_create_map_eq(dev, &table->pages_eq, MLX5_EQ_VEC_PAGES, - dev->caps.gen.max_vf + 1, + /* TODO: sriov max_vf + */ 1, 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", &dev->priv.uuari.uars[0]); if (err) { diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c new file mode 100644 index 000000000..ca90b9bc3 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/export.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/flow_table.h> +#include "mlx5_core.h" + +struct mlx5_ftg { + struct mlx5_flow_table_group g; + u32 id; + u32 start_ix; +}; + +struct mlx5_flow_table { + struct mlx5_core_dev *dev; + u8 level; + u8 type; + u32 id; + struct mutex mutex; /* sync bitmap alloc */ + u16 num_groups; + struct mlx5_ftg *group; + unsigned long *bitmap; + u32 size; +}; + +static int mlx5_set_flow_entry_cmd(struct mlx5_flow_table *ft, u32 group_ix, + u32 flow_index, void *flow_context) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + u32 *in; + void *in_flow_context; + int fcdls = + MLX5_GET(flow_context, flow_context, destination_list_size) * + MLX5_ST_SZ_BYTES(dest_format_struct); + int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fcdls; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, flow_index); + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + memcpy(in_flow_context, flow_context, + MLX5_ST_SZ_BYTES(flow_context) + fcdls); + + MLX5_SET(flow_context, in_flow_context, group_id, + ft->group[group_ix].id); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, + sizeof(out)); + kvfree(in); + + return err; +} + +static void mlx5_del_flow_entry_cmd(struct mlx5_flow_table *ft, u32 flow_index) +{ + u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; + u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFTEI(p, x, v) MLX5_SET(delete_fte_in, p, x, v) + MLX5_SET_DFTEI(in, table_type, ft->type); + MLX5_SET_DFTEI(in, table_id, ft->id); + MLX5_SET_DFTEI(in, flow_index, flow_index); + MLX5_SET_DFTEI(in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5_destroy_flow_group_cmd(struct mlx5_flow_table *ft, int i) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFGI(p, x, v) MLX5_SET(destroy_flow_group_in, p, x, v) + MLX5_SET_DFGI(in, table_type, ft->type); + MLX5_SET_DFGI(in, table_id, ft->id); + MLX5_SET_DFGI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET_DFGI(in, group_id, ft->group[i].id); + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_create_flow_group_cmd(struct mlx5_flow_table *ft, int i) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; + u32 *in; + void *in_match_criteria; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_group *g = &ft->group[i].g; + u32 start_ix = ft->group[i].start_ix; + u32 end_ix = start_ix + (1 << g->log_sz) - 1; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + in_match_criteria = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria); + + memset(out, 0, sizeof(out)); + +#define MLX5_SET_CFGI(p, x, v) MLX5_SET(create_flow_group_in, p, x, v) + MLX5_SET_CFGI(in, table_type, ft->type); + MLX5_SET_CFGI(in, table_id, ft->id); + MLX5_SET_CFGI(in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET_CFGI(in, start_flow_index, start_ix); + MLX5_SET_CFGI(in, end_flow_index, end_ix); + MLX5_SET_CFGI(in, match_criteria_enable, g->match_criteria_enable); + + memcpy(in_match_criteria, g->match_criteria, + MLX5_ST_SZ_BYTES(fte_match_param)); + + err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, + sizeof(out)); + if (!err) + ft->group[i].id = MLX5_GET(create_flow_group_out, out, + group_id); + + kvfree(in); + + return err; +} + +static void mlx5_destroy_flow_table_groups(struct mlx5_flow_table *ft) +{ + int i; + + for (i = 0; i < ft->num_groups; i++) + mlx5_destroy_flow_group_cmd(ft, i); +} + +static int mlx5_create_flow_table_groups(struct mlx5_flow_table *ft) +{ + int err; + int i; + + for (i = 0; i < ft->num_groups; i++) { + err = mlx5_create_flow_group_cmd(ft, i); + if (err) + goto err_destroy_flow_table_groups; + } + + return 0; + +err_destroy_flow_table_groups: + for (i--; i >= 0; i--) + mlx5_destroy_flow_group_cmd(ft, i); + + return err; +} + +static int mlx5_create_flow_table_cmd(struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(create_flow_table_in, in, table_type, ft->type); + MLX5_SET(create_flow_table_in, in, level, ft->level); + MLX5_SET(create_flow_table_in, in, log_size, order_base_2(ft->size)); + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, + sizeof(out)); + if (err) + return err; + + ft->id = MLX5_GET(create_flow_table_out, out, table_id); + + return 0; +} + +static void mlx5_destroy_flow_table_cmd(struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFTI(p, x, v) MLX5_SET(destroy_flow_table_in, p, x, v) + MLX5_SET_DFTI(in, table_type, ft->type); + MLX5_SET_DFTI(in, table_id, ft->id); + MLX5_SET_DFTI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); + + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_find_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, + u32 *match_criteria, int *group_ix) +{ + void *mc_outer = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + void *mc_misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters); + void *mc_inner = MLX5_ADDR_OF(fte_match_param, match_criteria, + inner_headers); + int mc_outer_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); + int mc_misc_sz = MLX5_ST_SZ_BYTES(fte_match_set_misc); + int mc_inner_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); + int i; + + for (i = 0; i < ft->num_groups; i++) { + struct mlx5_flow_table_group *g = &ft->group[i].g; + void *gmc_outer = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + outer_headers); + void *gmc_misc = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + misc_parameters); + void *gmc_inner = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + inner_headers); + + if (g->match_criteria_enable != match_criteria_enable) + continue; + + if (match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) + if (memcmp(mc_outer, gmc_outer, mc_outer_sz)) + continue; + + if (match_criteria_enable & MLX5_MATCH_MISC_PARAMETERS) + if (memcmp(mc_misc, gmc_misc, mc_misc_sz)) + continue; + + if (match_criteria_enable & MLX5_MATCH_INNER_HEADERS) + if (memcmp(mc_inner, gmc_inner, mc_inner_sz)) + continue; + + *group_ix = i; + return 0; + } + + return -EINVAL; +} + +static int alloc_flow_index(struct mlx5_flow_table *ft, int group_ix, u32 *ix) +{ + struct mlx5_ftg *g = &ft->group[group_ix]; + int err = 0; + + mutex_lock(&ft->mutex); + + *ix = find_next_zero_bit(ft->bitmap, ft->size, g->start_ix); + if (*ix >= (g->start_ix + (1 << g->g.log_sz))) + err = -ENOSPC; + else + __set_bit(*ix, ft->bitmap); + + mutex_unlock(&ft->mutex); + + return err; +} + +static void mlx5_free_flow_index(struct mlx5_flow_table *ft, u32 ix) +{ + __clear_bit(ix, ft->bitmap); +} + +int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, + void *match_criteria, void *flow_context, + u32 *flow_index) +{ + struct mlx5_flow_table *ft = flow_table; + int group_ix; + int err; + + err = mlx5_find_group(ft, match_criteria_enable, match_criteria, + &group_ix); + if (err) { + mlx5_core_warn(ft->dev, "mlx5_find_group failed\n"); + return err; + } + + err = alloc_flow_index(ft, group_ix, flow_index); + if (err) { + mlx5_core_warn(ft->dev, "alloc_flow_index failed\n"); + return err; + } + + return mlx5_set_flow_entry_cmd(ft, group_ix, *flow_index, flow_context); +} +EXPORT_SYMBOL(mlx5_add_flow_table_entry); + +void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index) +{ + struct mlx5_flow_table *ft = flow_table; + + mlx5_del_flow_entry_cmd(ft, flow_index); + mlx5_free_flow_index(ft, flow_index); +} +EXPORT_SYMBOL(mlx5_del_flow_table_entry); + +void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, + u16 num_groups, + struct mlx5_flow_table_group *group) +{ + struct mlx5_flow_table *ft; + u32 start_ix = 0; + u32 ft_size = 0; + void *gr; + void *bm; + int err; + int i; + + for (i = 0; i < num_groups; i++) + ft_size += (1 << group[i].log_sz); + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + gr = kcalloc(num_groups, sizeof(struct mlx5_ftg), GFP_KERNEL); + bm = kcalloc(BITS_TO_LONGS(ft_size), sizeof(uintptr_t), GFP_KERNEL); + if (!ft || !gr || !bm) + goto err_free_ft; + + ft->group = gr; + ft->bitmap = bm; + ft->num_groups = num_groups; + ft->level = level; + ft->type = table_type; + ft->size = ft_size; + ft->dev = dev; + mutex_init(&ft->mutex); + + for (i = 0; i < ft->num_groups; i++) { + memcpy(&ft->group[i].g, &group[i], sizeof(*group)); + ft->group[i].start_ix = start_ix; + start_ix += 1 << group[i].log_sz; + } + + err = mlx5_create_flow_table_cmd(ft); + if (err) + goto err_free_ft; + + err = mlx5_create_flow_table_groups(ft); + if (err) + goto err_destroy_flow_table_cmd; + + return ft; + +err_destroy_flow_table_cmd: + mlx5_destroy_flow_table_cmd(ft); + +err_free_ft: + mlx5_core_warn(dev, "failed to alloc flow table\n"); + kfree(bm); + kfree(gr); + kfree(ft); + + return NULL; +} +EXPORT_SYMBOL(mlx5_create_flow_table); + +void mlx5_destroy_flow_table(void *flow_table) +{ + struct mlx5_flow_table *ft = flow_table; + + mlx5_destroy_flow_table_groups(ft); + mlx5_destroy_flow_table_cmd(ft); + kfree(ft->bitmap); + kfree(ft->group); + kfree(ft); +} +EXPORT_SYMBOL(mlx5_destroy_flow_table); + +u32 mlx5_get_flow_table_id(void *flow_table) +{ + struct mlx5_flow_table *ft = flow_table; + + return ft->id; +} +EXPORT_SYMBOL(mlx5_get_flow_table_id); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 4b4cda3bc..9335e5ae1 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -35,79 +35,133 @@ #include <linux/module.h> #include "mlx5_core.h" -int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev) +static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, + int outlen) { - struct mlx5_cmd_query_adapter_mbox_out *out; - struct mlx5_cmd_query_adapter_mbox_in in; + u32 in[MLX5_ST_SZ_DW(query_adapter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_board_id(struct mlx5_core_dev *dev) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); int err; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - memset(&in, 0, sizeof(in)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_ADAPTER); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + err = mlx5_cmd_query_adapter(dev, out, outlen); if (err) - goto out_out; - - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(&out->hdr); - goto out_out; - } + goto out; - memcpy(dev->board_id, out->vsd_psid, sizeof(out->vsd_psid)); + memcpy(dev->board_id, + MLX5_ADDR_OF(query_adapter_out, out, + query_adapter_struct.vsd_contd_psid), + MLX5_FLD_SZ_BYTES(query_adapter_out, + query_adapter_struct.vsd_contd_psid)); -out_out: +out: kfree(out); - return err; } -int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps) +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id) { - return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR); + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_cmd_query_adapter(mdev, out, outlen); + if (err) + goto out; + + *vendor_id = MLX5_GET(query_adapter_out, out, + query_adapter_struct.ieee_vendor_id); +out: + kfree(out); + return err; } +EXPORT_SYMBOL(mlx5_core_query_vendor_id); -int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps) +int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { - u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; - int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *out; int err; - if (!(dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)) - return -ENOTSUPP; + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; - memset(in, 0, sizeof(in)); - out = kzalloc(out_sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, op_mod, HCA_CAP_OPMOD_GET_ODP_CUR); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX); if (err) - goto out; + return err; - err = mlx5_cmd_status_to_err_v2(out); - if (err) { - mlx5_core_warn(dev, "query cur hca ODP caps failed, %d\n", err); - goto out; + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; } - memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct), - sizeof(*caps)); + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } - mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n", - be32_to_cpu(caps->per_transport_caps.rc_odp_caps), - be32_to_cpu(caps->per_transport_caps.uc_odp_caps), - be32_to_cpu(caps->per_transport_caps.ud_odp_caps)); + if (MLX5_CAP_GEN(dev, atomic)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } -out: - kfree(out); - return err; + if (MLX5_CAP_GEN(dev, roce)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } -EXPORT_SYMBOL(mlx5_query_odp_caps); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) { diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/health.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/health.c index 292d76f2a..f5deb642d 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -34,6 +34,7 @@ #include <linux/module.h> #include <linux/random.h> #include <linux/vmalloc.h> +#include <linux/hardirq.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" @@ -46,39 +47,113 @@ enum { enum { MLX5_HEALTH_SYNDR_FW_ERR = 0x1, MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7, + MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8, MLX5_HEALTH_SYNDR_CRC_ERR = 0x9, MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa, MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb, MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc, MLX5_HEALTH_SYNDR_EQ_ERR = 0xd, + MLX5_HEALTH_SYNDR_EQ_INV = 0xe, MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf, + MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10 }; -static DEFINE_SPINLOCK(health_lock); -static LIST_HEAD(health_list); -static struct work_struct health_work; +enum { + MLX5_NIC_IFC_FULL = 0, + MLX5_NIC_IFC_DISABLED = 1, + MLX5_NIC_IFC_NO_DRAM_NIC = 2 +}; -static void health_care(struct work_struct *work) +static u8 get_nic_interface(struct mlx5_core_dev *dev) { - struct mlx5_core_health *health, *n; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - LIST_HEAD(tlist); + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; +} + +static void trigger_cmd_completions(struct mlx5_core_dev *dev) +{ + unsigned long flags; + u64 vector; - spin_lock_irq(&health_lock); - list_splice_init(&health_list, &tlist); + /* wait for pending handlers to complete */ + synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); + if (!vector) + goto no_trig; + + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} + +static int in_fatal(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; - spin_unlock_irq(&health_lock); + if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED) + return 1; - list_for_each_entry_safe(health, n, &tlist, list) { - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); - mlx5_core_warn(dev, "handling bad device here\n"); - /* nothing yet */ - spin_lock_irq(&health_lock); - list_del_init(&health->list); - spin_unlock_irq(&health_lock); + if (ioread32be(&h->fw_ver) == 0xffffffff) + return 1; + + return 0; +} + +void mlx5_enter_error_state(struct mlx5_core_dev *dev) +{ + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return; + + mlx5_core_err(dev, "start\n"); + if (pci_channel_offline(dev->pdev) || in_fatal(dev)) + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + + mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); + mlx5_core_err(dev, "end\n"); +} + +static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) +{ + u8 nic_interface = get_nic_interface(dev); + + switch (nic_interface) { + case MLX5_NIC_IFC_FULL: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n"); + break; + + case MLX5_NIC_IFC_DISABLED: + mlx5_core_warn(dev, "starting teardown\n"); + break; + + case MLX5_NIC_IFC_NO_DRAM_NIC: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); + break; + default: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", + nic_interface); } + + mlx5_disable_device(dev); +} + +static void health_care(struct work_struct *work) +{ + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + + health = container_of(work, struct mlx5_core_health, work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + mlx5_core_warn(dev, "handling bad device here\n"); + mlx5_handle_bad_state(dev); } static const char *hsynd_str(u8 synd) @@ -88,6 +163,8 @@ static const char *hsynd_str(u8 synd) return "firmware internal error"; case MLX5_HEALTH_SYNDR_IRISC_ERR: return "irisc not responding"; + case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR: + return "unrecoverable hardware error"; case MLX5_HEALTH_SYNDR_CRC_ERR: return "firmware CRC error"; case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: @@ -98,48 +175,81 @@ static const char *hsynd_str(u8 synd) return "async EQ buffer overrun"; case MLX5_HEALTH_SYNDR_EQ_ERR: return "EQ error"; + case MLX5_HEALTH_SYNDR_EQ_INV: + return "Invalid EQ refrenced"; case MLX5_HEALTH_SYNDR_FFSER_ERR: return "FFSER error"; + case MLX5_HEALTH_SYNDR_HIGH_TEMP: + return "High temprature"; default: return "unrecognized error"; } } -static u16 read_be16(__be16 __iomem *p) +static u16 get_maj(u32 fw) { - return swab16(readl((__force u16 __iomem *) p)); + return fw >> 28; } -static u32 read_be32(__be32 __iomem *p) +static u16 get_min(u32 fw) { - return swab32(readl((__force u32 __iomem *) p)); + return fw >> 16 & 0xfff; +} + +static u16 get_sub(u32 fw) +{ + return fw & 0xffff; } static void print_health_info(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; + char fw_str[18]; + u32 fw; int i; + /* If the syndrom is 0, the device is OK and no need to print buffer */ + if (!ioread8(&h->synd)) + return; + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) - pr_info("assert_var[%d] 0x%08x\n", i, read_be32(h->assert_var + i)); + dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i)); + + dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr)); + dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra)); + fw = ioread32be(&h->fw_ver); + sprintf(fw_str, "%d.%d.%d", get_maj(fw), get_min(fw), get_sub(fw)); + dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str); + dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); + dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index)); + dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); + dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); +} + +static unsigned long get_next_poll_jiffies(void) +{ + unsigned long next; - pr_info("assert_exit_ptr 0x%08x\n", read_be32(&h->assert_exit_ptr)); - pr_info("assert_callra 0x%08x\n", read_be32(&h->assert_callra)); - pr_info("fw_ver 0x%08x\n", read_be32(&h->fw_ver)); - pr_info("hw_id 0x%08x\n", read_be32(&h->hw_id)); - pr_info("irisc_index %d\n", readb(&h->irisc_index)); - pr_info("synd 0x%x: %s\n", readb(&h->synd), hsynd_str(readb(&h->synd))); - pr_info("ext_sync 0x%04x\n", read_be16(&h->ext_sync)); + get_random_bytes(&next, sizeof(next)); + next %= HZ; + next += jiffies + MLX5_HEALTH_POLL_INTERVAL; + + return next; } static void poll_health(unsigned long data) { struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; struct mlx5_core_health *health = &dev->priv.health; - unsigned long next; u32 count; + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + trigger_cmd_completions(dev); + mod_timer(&health->timer, get_next_poll_jiffies()); + return; + } + count = ioread32be(health->health_counter); if (count == health->prev) ++health->miss_counter; @@ -148,18 +258,16 @@ static void poll_health(unsigned long data) health->prev = count; if (health->miss_counter == MAX_MISSES) { - mlx5_core_err(dev, "device's health compromised\n"); + dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n"); print_health_info(dev); - spin_lock_irq(&health_lock); - list_add_tail(&health->list, &health_list); - spin_unlock_irq(&health_lock); - - queue_work(mlx5_core_wq, &health_work); } else { - get_random_bytes(&next, sizeof(next)); - next %= HZ; - next += jiffies + MLX5_HEALTH_POLL_INTERVAL; - mod_timer(&health->timer, next); + mod_timer(&health->timer, get_next_poll_jiffies()); + } + + if (in_fatal(dev) && !health->sick) { + health->sick = true; + print_health_info(dev); + queue_work(health->wq, &health->work); } } @@ -167,7 +275,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - INIT_LIST_HEAD(&health->list); init_timer(&health->timer); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -183,18 +290,33 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; del_timer_sync(&health->timer); - - spin_lock_irq(&health_lock); - if (!list_empty(&health->list)) - list_del_init(&health->list); - spin_unlock_irq(&health_lock); } -void mlx5_health_cleanup(void) +void mlx5_health_cleanup(struct mlx5_core_dev *dev) { + struct mlx5_core_health *health = &dev->priv.health; + + destroy_workqueue(health->wq); } -void __init mlx5_health_init(void) +int mlx5_health_init(struct mlx5_core_dev *dev) { - INIT_WORK(&health_work, health_care); + struct mlx5_core_health *health; + char *name; + + health = &dev->priv.health; + name = kmalloc(64, GFP_KERNEL); + if (!name) + return -ENOMEM; + + strcpy(name, "mlx5_health"); + strcat(name, dev_name(&dev->pdev->dev)); + health->wq = create_singlethread_workqueue(name); + kfree(name); + if (!health->wq) + return -ENOMEM; + + INIT_WORK(&health->work, health_care); + + return 0; } diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/mad.c index ee1b0b965..1368dac00 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -36,7 +36,7 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port) { struct mlx5_mad_ifc_mbox_in *in = NULL; diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/main.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/main.c index 28425e5ea..6cf6d93d8 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include <asm-generic/kmap_types.h> +#include <linux/highmem.h> #include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> @@ -38,19 +38,18 @@ #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/io-mapping.h> +#include <linux/interrupt.h> +#include <linux/delay.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/srq.h> #include <linux/debugfs.h> #include <linux/kmod.h> +#include <linux/delay.h> #include <linux/mlx5/mlx5_ifc.h> #include "mlx5_core.h" -#define DRIVER_NAME "mlx5_core" -#define DRIVER_VERSION "3.0" -#define DRIVER_RELDATE "January 2015" - MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -65,7 +64,6 @@ static int prof_sel = MLX5_DEFAULT_PROF; module_param_named(prof_sel, prof_sel, int, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); -struct workqueue_struct *mlx5_core_wq; static LIST_HEAD(intf_list); static LIST_HEAD(dev_list); static DEFINE_MUTEX(intf_mutex); @@ -155,6 +153,25 @@ static struct mlx5_profile profile[] = { }, }; +#define FW_INIT_TIMEOUT_MILI 2000 +#define FW_INIT_WAIT_MS 2 + +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) +{ + unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); + int err = 0; + + while (fw_initializing(dev)) { + if (time_after(jiffies, end)) { + err = -EBUSY; + break; + } + msleep(FW_INIT_WAIT_MS); + } + + return err; +} + static int set_dma_caps(struct pci_dev *pdev) { int err; @@ -185,6 +202,34 @@ static int set_dma_caps(struct pci_dev *pdev) return err; } +static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err = 0; + + mutex_lock(&dev->pci_status_mutex); + if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) { + err = pci_enable_device(pdev); + if (!err) + dev->pci_status = MLX5_PCI_STATUS_ENABLED; + } + mutex_unlock(&dev->pci_status_mutex); + + return err; +} + +static void mlx5_pci_disable_device(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + + mutex_lock(&dev->pci_status_mutex); + if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) { + pci_disable_device(pdev); + dev->pci_status = MLX5_PCI_STATUS_DISABLED; + } + mutex_unlock(&dev->pci_status_mutex); +} + static int request_bar(struct pci_dev *pdev) { int err = 0; @@ -208,24 +253,28 @@ static void release_bar(struct pci_dev *pdev) static int mlx5_enable_msix(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; - int num_eqs = 1 << dev->caps.gen.log_max_eq; + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = &priv->eq_table; + int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; int i; - nvec = dev->caps.gen.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL); - if (!table->msix_arr) - return -ENOMEM; + priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL); + + priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); + if (!priv->msix_arr || !priv->irq_info) + goto err_free_msix; for (i = 0; i < nvec; i++) - table->msix_arr[i].entry = i; + priv->msix_arr[i].entry = i; - nvec = pci_enable_msix_range(dev->pdev, table->msix_arr, + nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr, MLX5_EQ_VEC_COMP_BASE + 1, nvec); if (nvec < 0) return nvec; @@ -233,14 +282,20 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; return 0; + +err_free_msix: + kfree(priv->irq_info); + kfree(priv->msix_arr); + return -ENOMEM; } static void mlx5_disable_msix(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_priv *priv = &dev->priv; pci_disable_msix(dev->pdev); - kfree(table->msix_arr); + kfree(priv->irq_info); + kfree(priv->msix_arr); } struct mlx5_reg_host_endianess { @@ -277,98 +332,20 @@ static u16 to_fw_pkey_sz(u32 size) } } -/* selectively copy writable fields clearing any reserved area - */ -static void copy_rw_fields(void *to, struct mlx5_caps *from) -{ - __be64 *flags_off = (__be64 *)MLX5_ADDR_OF(cmd_hca_cap, to, reserved_22); - u64 v64; - - MLX5_SET(cmd_hca_cap, to, log_max_qp, from->gen.log_max_qp); - MLX5_SET(cmd_hca_cap, to, log_max_ra_req_qp, from->gen.log_max_ra_req_qp); - MLX5_SET(cmd_hca_cap, to, log_max_ra_res_qp, from->gen.log_max_ra_res_qp); - MLX5_SET(cmd_hca_cap, to, pkey_table_size, from->gen.pkey_table_size); - MLX5_SET(cmd_hca_cap, to, pkey_table_size, to_fw_pkey_sz(from->gen.pkey_table_size)); - MLX5_SET(cmd_hca_cap, to, log_uar_page_sz, PAGE_SHIFT - 12); - v64 = from->gen.flags & MLX5_CAP_BITS_RW_MASK; - *flags_off = cpu_to_be64(v64); -} - -static u16 get_pkey_table_size(int pkey) -{ - if (pkey > MLX5_MAX_LOG_PKEY_TABLE) - return 0; - - return MLX5_MIN_PKEY_TABLE_SIZE << pkey; -} - -static void fw2drv_caps(struct mlx5_caps *caps, void *out) -{ - struct mlx5_general_caps *gen = &caps->gen; - - gen->max_srq_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_srq_sz); - gen->max_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_qp_sz); - gen->log_max_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_qp); - gen->log_max_strq = MLX5_GET_PR(cmd_hca_cap, out, log_max_strq_sz); - gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srqs); - gen->max_cqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_cq_sz); - gen->log_max_cq = MLX5_GET_PR(cmd_hca_cap, out, log_max_cq); - gen->max_eqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_eq_sz); - gen->log_max_mkey = MLX5_GET_PR(cmd_hca_cap, out, log_max_mkey); - gen->log_max_eq = MLX5_GET_PR(cmd_hca_cap, out, log_max_eq); - gen->max_indirection = MLX5_GET_PR(cmd_hca_cap, out, max_indirection); - gen->log_max_mrw_sz = MLX5_GET_PR(cmd_hca_cap, out, log_max_mrw_sz); - gen->log_max_bsf_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_bsf_list_size); - gen->log_max_klm_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_klm_list_size); - gen->log_max_ra_req_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_dc); - gen->log_max_ra_res_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_dc); - gen->log_max_ra_req_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_qp); - gen->log_max_ra_res_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_qp); - gen->max_qp_counters = MLX5_GET_PR(cmd_hca_cap, out, max_qp_cnt); - gen->pkey_table_size = get_pkey_table_size(MLX5_GET_PR(cmd_hca_cap, out, pkey_table_size)); - gen->local_ca_ack_delay = MLX5_GET_PR(cmd_hca_cap, out, local_ca_ack_delay); - gen->num_ports = MLX5_GET_PR(cmd_hca_cap, out, num_ports); - gen->log_max_msg = MLX5_GET_PR(cmd_hca_cap, out, log_max_msg); - gen->stat_rate_support = MLX5_GET_PR(cmd_hca_cap, out, stat_rate_support); - gen->flags = be64_to_cpu(*(__be64 *)MLX5_ADDR_OF(cmd_hca_cap, out, reserved_22)); - pr_debug("flags = 0x%llx\n", gen->flags); - gen->uar_sz = MLX5_GET_PR(cmd_hca_cap, out, uar_sz); - gen->min_log_pg_sz = MLX5_GET_PR(cmd_hca_cap, out, log_pg_sz); - gen->bf_reg_size = MLX5_GET_PR(cmd_hca_cap, out, bf); - gen->bf_reg_size = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_bf_reg_size); - gen->max_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq); - gen->max_rq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_rq); - gen->max_dc_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq_dc); - gen->max_qp_mcg = MLX5_GET_PR(cmd_hca_cap, out, max_qp_mcg); - gen->log_max_pd = MLX5_GET_PR(cmd_hca_cap, out, log_max_pd); - gen->log_max_xrcd = MLX5_GET_PR(cmd_hca_cap, out, log_max_xrcd); - gen->log_uar_page_sz = MLX5_GET_PR(cmd_hca_cap, out, log_uar_page_sz); -} - -static const char *caps_opmod_str(u16 opmod) -{ - switch (opmod) { - case HCA_CAP_OPMOD_GET_MAX: - return "GET_MAX"; - case HCA_CAP_OPMOD_GET_CUR: - return "GET_CUR"; - default: - return "Invalid"; - } -} - -int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, - u16 opmod) +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) { u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *out; + void *out, *hca_caps; + u16 opmod = (cap_type << 1) | (cap_mode & 0x01); int err; memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); @@ -377,12 +354,30 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, err = mlx5_cmd_status_to_err_v2(out); if (err) { - mlx5_core_warn(dev, "query max hca cap failed, %d\n", err); + mlx5_core_warn(dev, + "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", + cap_type, cap_mode, err); goto query_ex; } - mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod)); - fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct)); + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); + + switch (cap_mode) { + case HCA_CAP_OPMOD_GET_MAX: + memcpy(dev->hca_caps_max[cap_type], hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + case HCA_CAP_OPMOD_GET_CUR: + memcpy(dev->hca_caps_cur[cap_type], hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + default: + mlx5_core_warn(dev, + "Tried to query dev cap type(%x) with wrong opmode(%x)\n", + cap_type, cap_mode); + err = -EINVAL; + break; + } query_ex: kfree(out); return err; @@ -409,49 +404,47 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; struct mlx5_profile *prof = dev->profile; - struct mlx5_caps *cur_caps = NULL; - struct mlx5_caps *max_caps = NULL; int err = -ENOMEM; int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_hca_cap; set_ctx = kzalloc(set_sz, GFP_KERNEL); if (!set_ctx) goto query_ex; - max_caps = kzalloc(sizeof(*max_caps), GFP_KERNEL); - if (!max_caps) - goto query_ex; - - cur_caps = kzalloc(sizeof(*cur_caps), GFP_KERNEL); - if (!cur_caps) - goto query_ex; - - err = mlx5_core_get_caps(dev, max_caps, HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX); if (err) goto query_ex; - err = mlx5_core_get_caps(dev, cur_caps, HCA_CAP_OPMOD_GET_CUR); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); if (err) goto query_ex; + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL], + MLX5_ST_SZ_BYTES(cmd_hca_cap)); + + mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", + mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), + 128); /* we limit the size of the pkey table to 128 entries for now */ - cur_caps->gen.pkey_table_size = 128; + MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, + to_fw_pkey_sz(128)); if (prof->mask & MLX5_PROF_MASK_QP_SIZE) - cur_caps->gen.log_max_qp = prof->log_max_qp; + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, + prof->log_max_qp); - /* disable checksum */ - cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + /* disable cmdif checksum */ + MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + + MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); - copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, hca_capability_struct), - cur_caps); err = set_caps(dev, set_ctx, set_sz); query_ex: - kfree(cur_caps); - kfree(max_caps); kfree(set_ctx); - return err; } @@ -507,7 +500,76 @@ static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) return 0; } -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn) +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + struct msix_entry *msix = priv->msix_arr; + int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; + int numa_node = priv->numa_node; + int err; + + if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, numa_node), + priv->irq_info[i].mask); + + err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); + if (err) { + mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x", + irq); + goto err_clear_mask; + } + + return 0; + +err_clear_mask: + free_cpumask_var(priv->irq_info[i].mask); + return err; +} + +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + struct msix_entry *msix = priv->msix_arr; + int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(priv->irq_info[i].mask); +} + +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + err = mlx5_irq_set_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + mlx5_irq_clear_affinity_hint(mdev, i); + + return err; +} + +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + mlx5_irq_clear_affinity_hint(mdev, i); +} + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) { struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq, *n; @@ -549,7 +611,7 @@ static void free_comp_eqs(struct mlx5_core_dev *dev) static int alloc_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; - char name[MLX5_MAX_EQ_NAME]; + char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq *eq; int ncomp_vec; int nent; @@ -566,7 +628,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } - snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, name, &dev->priv.uuari.uars[0]); @@ -588,12 +650,197 @@ clean: return err; } -static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) +#ifdef CONFIG_MLX5_CORE_EN +static int mlx5_core_set_issi(struct mlx5_core_dev *dev) { - struct mlx5_priv *priv = &dev->priv; + u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]; + u32 query_out[MLX5_ST_SZ_DW(query_issi_out)]; + u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]; + u32 set_out[MLX5_ST_SZ_DW(set_issi_out)]; int err; + u32 sup_issi; + + memset(query_in, 0, sizeof(query_in)); + memset(query_out, 0, sizeof(query_out)); + + MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); + + err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in), + query_out, sizeof(query_out)); + if (err) { + if (((struct mlx5_outbox_hdr *)query_out)->status == + MLX5_CMD_STAT_BAD_OP_ERR) { + pr_debug("Only ISSI 0 is supported\n"); + return 0; + } + + pr_err("failed to query ISSI\n"); + return err; + } + + sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); + + if (sup_issi & (1 << 1)) { + memset(set_in, 0, sizeof(set_in)); + memset(set_out, 0, sizeof(set_out)); + + MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); + MLX5_SET(set_issi_in, set_in, current_issi, 1); + + err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in), + set_out, sizeof(set_out)); + if (err) { + pr_err("failed to set ISSI=1\n"); + return err; + } + + dev->issi = 1; + + return 0; + } else if (sup_issi & (1 << 0) || !sup_issi) { + return 0; + } + + return -ENOTSUPP; +} +#endif + +static int map_bf_area(struct mlx5_core_dev *dev) +{ + resource_size_t bf_start = pci_resource_start(dev->pdev, 0); + resource_size_t bf_len = pci_resource_len(dev->pdev, 0); + + dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len); + + return dev->priv.bf_mapping ? 0 : -ENOMEM; +} + +static void unmap_bf_area(struct mlx5_core_dev *dev) +{ + if (dev->priv.bf_mapping) + io_mapping_free(dev->priv.bf_mapping); +} + +static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); + if (!dev_ctx) + return; + + dev_ctx->intf = intf; + dev_ctx->context = intf->add(dev); + + if (dev_ctx->context) { + spin_lock_irq(&priv->ctx_lock); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irq(&priv->ctx_lock); + } else { + kfree(dev_ctx); + } +} + +static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) { + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + intf->remove(dev, dev_ctx->context); + kfree(dev_ctx); + return; + } +} + +static int mlx5_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&intf_mutex); + list_add_tail(&priv->dev_list, &dev_list); + list_for_each_entry(intf, &intf_list, list) + mlx5_add_device(intf, priv); + mutex_unlock(&intf_mutex); + + return 0; +} + +static void mlx5_unregister_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_remove_device(intf, priv); + list_del(&priv->dev_list); + mutex_unlock(&intf_mutex); +} + +int mlx5_register_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + if (!intf->add || !intf->remove) + return -EINVAL; + + mutex_lock(&intf_mutex); + list_add_tail(&intf->list, &intf_list); + list_for_each_entry(priv, &dev_list, dev_list) + mlx5_add_device(intf, priv); + mutex_unlock(&intf_mutex); + + return 0; +} +EXPORT_SYMBOL(mlx5_register_interface); + +void mlx5_unregister_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + mutex_lock(&intf_mutex); + list_for_each_entry(priv, &dev_list, dev_list) + mlx5_remove_device(intf, priv); + list_del(&intf->list); + mutex_unlock(&intf_mutex); +} +EXPORT_SYMBOL(mlx5_unregister_interface); + +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + void *result = NULL; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) + if ((dev_ctx->intf->protocol == protocol) && + dev_ctx->intf->get_dev) { + result = dev_ctx->intf->get_dev(dev_ctx->context); + break; + } + + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + return result; +} +EXPORT_SYMBOL(mlx5_get_protocol_dev); + +static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err = 0; - dev->pdev = pdev; pci_set_drvdata(dev->pdev, dev); strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN); priv->name[MLX5_MAX_NAME_LEN - 1] = 0; @@ -602,11 +849,15 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) INIT_LIST_HEAD(&priv->pgdir_list); spin_lock_init(&priv->mkey_lock); + mutex_init(&priv->alloc_mutex); + + priv->numa_node = dev_to_node(&dev->pdev->dev); + priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); if (!priv->dbg_root) return -ENOMEM; - err = pci_enable_device(pdev); + err = mlx5_pci_enable_device(dev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n"); goto err_dbg; @@ -633,13 +884,61 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n"); goto err_clr_master; } + + return 0; + +err_clr_master: + pci_clear_master(dev->pdev); + release_bar(dev->pdev); +err_disable: + mlx5_pci_disable_device(dev); + +err_dbg: + debugfs_remove(priv->dbg_root); + return err; +} + +static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + iounmap(dev->iseg); + pci_clear_master(dev->pdev); + release_bar(dev->pdev); + mlx5_pci_disable_device(dev); + debugfs_remove(priv->dbg_root); +} + +#define MLX5_IB_MOD "mlx5_ib" +static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + mutex_lock(&dev->intf_state_mutex); + if (dev->interface_state == MLX5_INTERFACE_STATE_UP) { + dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", + __func__); + goto out; + } + dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); + /* on load removing any previous indication of internal error, device is + * up + */ + dev->state = MLX5_DEVICE_STATE_UP; + err = mlx5_cmd_init(dev); if (err) { dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); - goto err_unmap; + goto out_err; + } + + err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); + if (err) { + dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n", + FW_INIT_TIMEOUT_MILI); + goto out_err; } mlx5_pagealloc_init(dev); @@ -650,6 +949,14 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_pagealloc_cleanup; } +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_core_set_issi(dev); + if (err) { + dev_err(&pdev->dev, "failed to set issi\n"); + goto err_disable_hca; + } +#endif + err = mlx5_satisfy_startup_pages(dev, 1); if (err) { dev_err(&pdev->dev, "failed to allocate boot pages\n"); @@ -688,15 +995,15 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) mlx5_start_health_poll(dev); - err = mlx5_cmd_query_hca_cap(dev, &dev->caps); + err = mlx5_query_hca_caps(dev); if (err) { dev_err(&pdev->dev, "query hca failed\n"); goto err_stop_poll; } - err = mlx5_cmd_query_adapter(dev); + err = mlx5_query_board_id(dev); if (err) { - dev_err(&pdev->dev, "query adapter failed\n"); + dev_err(&pdev->dev, "query board id failed\n"); goto err_stop_poll; } @@ -730,6 +1037,15 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_stop_eqs; } + if (map_bf_area(dev)) + dev_err(&pdev->dev, "Failed to map blue flame area\n"); + + err = mlx5_irq_set_affinity_hints(dev); + if (err) { + dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); + goto err_unmap_bf_area; + } + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); mlx5_init_cq_table(dev); @@ -737,8 +1053,34 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); + err = mlx5_register_device(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); + goto err_reg_dev; + } + + err = request_module_nowait(MLX5_IB_MOD); + if (err) + pr_info("failed request module on %s\n", MLX5_IB_MOD); + + dev->interface_state = MLX5_INTERFACE_STATE_UP; +out: + mutex_unlock(&dev->intf_state_mutex); + return 0; +err_reg_dev: + mlx5_cleanup_mr_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + mlx5_irq_clear_affinity_hints(dev); + +err_unmap_bf_area: + unmap_bf_area(dev); + + free_comp_eqs(dev); + err_stop_eqs: mlx5_stop_eqs(dev); @@ -755,7 +1097,7 @@ err_stop_poll: mlx5_stop_health_poll(dev); if (mlx5_cmd_teardown_hca(dev)) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); - return err; + goto out_err; } err_pagealloc_stop: @@ -771,167 +1113,55 @@ err_pagealloc_cleanup: mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); -err_unmap: - iounmap(dev->iseg); - -err_clr_master: - pci_clear_master(dev->pdev); - release_bar(dev->pdev); - -err_disable: - pci_disable_device(dev->pdev); +out_err: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mutex_unlock(&dev->intf_state_mutex); -err_dbg: - debugfs_remove(priv->dbg_root); return err; } -static void mlx5_dev_cleanup(struct mlx5_core_dev *dev) +static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { - struct mlx5_priv *priv = &dev->priv; + int err = 0; + mutex_lock(&dev->intf_state_mutex); + if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { + dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", + __func__); + goto out; + } + mlx5_unregister_device(dev); + mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); + mlx5_irq_clear_affinity_hints(dev); + unmap_bf_area(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); mlx5_eq_cleanup(dev); mlx5_disable_msix(dev); mlx5_stop_health_poll(dev); - if (mlx5_cmd_teardown_hca(dev)) { + err = mlx5_cmd_teardown_hca(dev); + if (err) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); - return; + goto out; } mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev); mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); - iounmap(dev->iseg); - pci_clear_master(dev->pdev); - release_bar(dev->pdev); - pci_disable_device(dev->pdev); - debugfs_remove(priv->dbg_root); -} - -static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); - if (!dev_ctx) { - pr_warn("mlx5_add_device: alloc context failed\n"); - return; - } - - dev_ctx->intf = intf; - dev_ctx->context = intf->add(dev); - - if (dev_ctx->context) { - spin_lock_irq(&priv->ctx_lock); - list_add_tail(&dev_ctx->list, &priv->ctx_list); - spin_unlock_irq(&priv->ctx_lock); - } else { - kfree(dev_ctx); - } -} - -static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf == intf) { - spin_lock_irq(&priv->ctx_lock); - list_del(&dev_ctx->list); - spin_unlock_irq(&priv->ctx_lock); - - intf->remove(dev, dev_ctx->context); - kfree(dev_ctx); - return; - } -} -static int mlx5_register_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&intf_mutex); - list_add_tail(&priv->dev_list, &dev_list); - list_for_each_entry(intf, &intf_list, list) - mlx5_add_device(intf, priv); - mutex_unlock(&intf_mutex); - return 0; -} -static void mlx5_unregister_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_remove_device(intf, priv); - list_del(&priv->dev_list); - mutex_unlock(&intf_mutex); -} - -int mlx5_register_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - if (!intf->add || !intf->remove) - return -EINVAL; - - mutex_lock(&intf_mutex); - list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &dev_list, dev_list) - mlx5_add_device(intf, priv); - mutex_unlock(&intf_mutex); - - return 0; -} -EXPORT_SYMBOL(mlx5_register_interface); - -void mlx5_unregister_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - mutex_lock(&intf_mutex); - list_for_each_entry(priv, &dev_list, dev_list) - mlx5_remove_device(intf, priv); - list_del(&intf->list); - mutex_unlock(&intf_mutex); -} -EXPORT_SYMBOL(mlx5_unregister_interface); - -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; +out: + dev->interface_state = MLX5_INTERFACE_STATE_DOWN; + mutex_unlock(&dev->intf_state_mutex); + return err; } -EXPORT_SYMBOL(mlx5_get_protocol_dev); -static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) +void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + unsigned long param) { struct mlx5_priv *priv = &dev->priv; struct mlx5_device_context *dev_ctx; @@ -952,7 +1182,6 @@ struct mlx5_core_event_handler { void *data); }; -#define MLX5_IB_MOD "mlx5_ib" static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) @@ -976,43 +1205,166 @@ static int init_one(struct pci_dev *pdev, prof_sel = MLX5_DEFAULT_PROF; } dev->profile = &profile[prof_sel]; + dev->pdev = pdev; dev->event = mlx5_core_event; INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); - err = mlx5_dev_init(dev, pdev); + mutex_init(&dev->pci_status_mutex); + mutex_init(&dev->intf_state_mutex); + err = mlx5_pci_init(dev, priv); if (err) { - dev_err(&pdev->dev, "mlx5_dev_init failed %d\n", err); - goto out; + dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); + goto clean_dev; } - err = mlx5_register_device(dev); + err = mlx5_health_init(dev); if (err) { - dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); - goto out_init; + dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err); + goto close_pci; } - err = request_module_nowait(MLX5_IB_MOD); - if (err) - pr_info("failed request module on %s\n", MLX5_IB_MOD); + err = mlx5_load_one(dev, priv); + if (err) { + dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); + goto clean_health; + } return 0; -out_init: - mlx5_dev_cleanup(dev); -out: +clean_health: + mlx5_health_cleanup(dev); +close_pci: + mlx5_pci_close(dev, priv); +clean_dev: + pci_set_drvdata(pdev, NULL); kfree(dev); + return err; } + static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; - mlx5_unregister_device(dev); - mlx5_dev_cleanup(dev); + if (mlx5_unload_one(dev, priv)) { + dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); + mlx5_health_cleanup(dev); + return; + } + mlx5_health_cleanup(dev); + mlx5_pci_close(dev, priv); + pci_set_drvdata(pdev, NULL); kfree(dev); } +static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + + dev_info(&pdev->dev, "%s was called\n", __func__); + mlx5_enter_error_state(dev); + mlx5_unload_one(dev, priv); + mlx5_pci_disable_device(dev); + return state == pci_channel_io_perm_failure ? + PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err = 0; + + dev_info(&pdev->dev, "%s was called\n", __func__); + + err = mlx5_pci_enable_device(dev); + if (err) { + dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" + , __func__, err); + return PCI_ERS_RESULT_DISCONNECT; + } + pci_set_master(pdev); + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + + return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; +} + +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_err_detected(dev->pdev, 0); +} + +/* wait for the device to show vital signs. For now we check + * that we can read the device ID and that the health buffer + * shows a non zero value which is different than 0xffffffff + */ +static void wait_vital(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_health *health = &dev->priv.health; + const int niter = 100; + u32 count; + u16 did; + int i; + + /* Wait for firmware to be ready after reset */ + msleep(1000); + for (i = 0; i < niter; i++) { + if (pci_read_config_word(pdev, 2, &did)) { + dev_warn(&pdev->dev, "failed reading config word\n"); + break; + } + if (did == pdev->device) { + dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i); + break; + } + msleep(50); + } + if (i == niter) + dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); + + for (i = 0; i < niter; i++) { + count = ioread32be(health->health_counter); + if (count && count != 0xffffffff) { + dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); + break; + } + msleep(50); + } + + if (i == niter) + dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); +} + +static void mlx5_pci_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + int err; + + dev_info(&pdev->dev, "%s was called\n", __func__); + + pci_save_state(pdev); + wait_vital(pdev); + + err = mlx5_load_one(dev, priv); + if (err) + dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" + , __func__, err); + else + dev_info(&pdev->dev, "%s: device recovered\n", __func__); +} + +static const struct pci_error_handlers mlx5_err_handler = { + .error_detected = mlx5_pci_err_detected, + .slot_reset = mlx5_pci_slot_reset, + .resume = mlx5_pci_resume +}; + static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ @@ -1029,7 +1381,8 @@ static struct pci_driver mlx5_core_driver = { .name = DRIVER_NAME, .id_table = mlx5_core_pci_table, .probe = init_one, - .remove = remove_one + .remove = remove_one, + .err_handler = &mlx5_err_handler }; static int __init init(void) @@ -1037,22 +1390,17 @@ static int __init init(void) int err; mlx5_register_debugfs(); - mlx5_core_wq = create_singlethread_workqueue("mlx5_core_wq"); - if (!mlx5_core_wq) { - err = -ENOMEM; - goto err_debug; - } - mlx5_health_init(); err = pci_register_driver(&mlx5_core_driver); if (err) - goto err_health; + goto err_debug; + +#ifdef CONFIG_MLX5_CORE_EN + mlx5e_init(); +#endif return 0; -err_health: - mlx5_health_cleanup(); - destroy_workqueue(mlx5_core_wq); err_debug: mlx5_unregister_debugfs(); return err; @@ -1060,9 +1408,10 @@ err_debug: static void __exit cleanup(void) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5e_cleanup(); +#endif pci_unregister_driver(&mlx5_core_driver); - mlx5_health_cleanup(); - destroy_workqueue(mlx5_core_wq); mlx5_unregister_debugfs(); } diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/mcg.c index d79fd85d1..d5a0c2d61 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/mcg.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -91,7 +91,7 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) memset(&in, 0, sizeof(in)); memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETACH_FROM_MCG); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG); memcpy(in.gid, mgid, sizeof(*mgid)); in.qpn = cpu_to_be32(qpn); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/kernel/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index a051b906a..cee5b7a83 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,27 +37,31 @@ #include <linux/kernel.h> #include <linux/sched.h> +#define DRIVER_NAME "mlx5_core" +#define DRIVER_VERSION "3.0-1" +#define DRIVER_RELDATE "January 2015" + extern int mlx5_core_debug_mask; -#define mlx5_core_dbg(dev, format, ...) \ - pr_debug("%s:%s:%d:(pid %d): " format, \ - (dev)->priv.name, __func__, __LINE__, current->pid, \ +#define mlx5_core_dbg(__dev, format, ...) \ + dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ + (__dev)->priv.name, __func__, __LINE__, current->pid, \ ##__VA_ARGS__) -#define mlx5_core_dbg_mask(dev, mask, format, ...) \ +#define mlx5_core_dbg_mask(__dev, mask, format, ...) \ do { \ if ((mask) & mlx5_core_debug_mask) \ - mlx5_core_dbg(dev, format, ##__VA_ARGS__); \ + mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \ } while (0) -#define mlx5_core_err(dev, format, ...) \ - pr_err("%s:%s:%d:(pid %d): " format, \ - (dev)->priv.name, __func__, __LINE__, current->pid, \ +#define mlx5_core_err(__dev, format, ...) \ + dev_err(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ + (__dev)->priv.name, __func__, __LINE__, current->pid, \ ##__VA_ARGS__) -#define mlx5_core_warn(dev, format, ...) \ - pr_warn("%s:%s:%d:(pid %d): " format, \ - (dev)->priv.name, __func__, __LINE__, current->pid, \ +#define mlx5_core_warn(__dev, format, ...) \ + dev_warn(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ + (__dev)->priv.name, __func__, __LINE__, current->pid, \ ##__VA_ARGS__) enum { @@ -65,11 +69,29 @@ enum { MLX5_CMD_TIME, /* print command execution time */ }; +static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in, + int in_size, u32 *out, + int out_size) +{ + int err; + + err = mlx5_cmd_exec(dev, in, in_size, out, out_size); + if (err) + return err; -int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, - struct mlx5_caps *caps); -int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); + return mlx5_cmd_status_to_err((struct mlx5_outbox_hdr *)out); +} + +int mlx5_query_hca_caps(struct mlx5_core_dev *dev); +int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); +void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + unsigned long param); +void mlx5_enter_error_state(struct mlx5_core_dev *dev); +void mlx5_disable_device(struct mlx5_core_dev *dev); + +void mlx5e_init(void); +void mlx5e_cleanup(void); #endif /* __MLX5_CORE_H__ */ diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 1adb300dd..6fa22b51e 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -40,6 +40,7 @@ void mlx5_init_mr_table(struct mlx5_core_dev *dev) { struct mlx5_mr_table *table = &dev->priv.mr_table; + memset(table, 0, sizeof(*table)); rwlock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); } diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 8a64542ab..4d3377b12 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include <asm-generic/kmap_types.h> +#include <linux/highmem.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mlx5/driver.h> @@ -275,12 +275,36 @@ out_alloc: return err; } + +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) +{ + struct mlx5_manage_pages_inbox *in; + struct mlx5_manage_pages_outbox out; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return; + + memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); + in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); + in->func_id = cpu_to_be16(func_id); + err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); + if (!err) + err = mlx5_cmd_status_to_err(&out.hdr); + + if (err) + mlx5_core_warn(dev, "page notify failed\n"); + + kfree(in); +} + static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int notify_fail) { struct mlx5_manage_pages_inbox *in; struct mlx5_manage_pages_outbox out; - struct mlx5_manage_pages_inbox *nin; int inlen; u64 addr; int err; @@ -289,8 +313,9 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, inlen = sizeof(*in) + npages * sizeof(in->pas[0]); in = mlx5_vzalloc(inlen); if (!in) { + err = -ENOMEM; mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); - return -ENOMEM; + goto out_free; } memset(&out, 0, sizeof(out)); @@ -316,43 +341,29 @@ retry: if (err) { mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); - goto out_alloc; + goto out_4k; } dev->priv.fw_pages += npages; - if (out.hdr.status) { - err = mlx5_cmd_status_to_err(&out.hdr); - if (err) { - mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", - func_id, npages, out.hdr.status); - goto out_alloc; - } + err = mlx5_cmd_status_to_err(&out.hdr); + if (err) { + mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", + func_id, npages, out.hdr.status); + goto out_4k; } mlx5_core_dbg(dev, "err %d\n", err); - goto out_free; - -out_alloc: - if (notify_fail) { - nin = kzalloc(sizeof(*nin), GFP_KERNEL); - if (!nin) { - mlx5_core_warn(dev, "allocation failed\n"); - goto out_4k; - } - memset(&out, 0, sizeof(out)); - nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); - nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); - if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out))) - mlx5_core_warn(dev, "page notify failed\n"); - kfree(nin); - } + kvfree(in); + return 0; out_4k: for (i--; i >= 0; i--) free_4k(dev, be64_to_cpu(in->pas[i])); out_free: kvfree(in); + if (notify_fail) + page_notify_fail(dev, func_id); return err; } @@ -482,15 +493,20 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) struct fw_page *fwp; struct rb_node *p; int nclaimed = 0; - int err; + int err = 0; do { p = rb_first(&dev->priv.page_root); if (p) { fwp = rb_entry(p, struct fw_page, rb_node); - err = reclaim_pages(dev, fwp->func_id, - optimal_reclaimed_pages(), - &nclaimed); + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + free_4k(dev, fwp->addr); + nclaimed = 1; + } else { + err = reclaim_pages(dev, fwp->func_id, + optimal_reclaimed_pages(), + &nclaimed); + } if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/port.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/port.c index 49e90f261..a87e773e9 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -90,15 +90,276 @@ int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps) { struct mlx5_reg_pcap in; struct mlx5_reg_pcap out; - int err; memset(&in, 0, sizeof(in)); in.caps_127_96 = cpu_to_be32(caps); in.port_num = port_num; - err = mlx5_core_access_reg(dev, &in, sizeof(in), &out, - sizeof(out), MLX5_REG_PCAP, 0, 1); - - return err; + return mlx5_core_access_reg(dev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_PCAP, 0, 1); } EXPORT_SYMBOL_GPL(mlx5_set_port_caps); + +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(ptys_reg, in, local_port, local_port); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + + return mlx5_core_access_reg(dev, in, sizeof(in), ptys, + ptys_size, MLX5_REG_PTYS, 0, 0); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); + +int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, + u32 *proto_cap, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + else + *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); + +int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, + u32 *proto_admin, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + else + *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); + +int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, + u8 *link_width_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port); + if (err) + return err; + + *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); + +int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, int proto_mask, + u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, local_port); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + else + *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper); + +int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, + int proto_mask) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + if (proto_mask == MLX5_PTYS_EN) + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_proto); + +int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(paos_reg, in, local_port, 1); + MLX5_SET(paos_reg, in, admin_status, status); + MLX5_SET(paos_reg, in, ase, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); + +int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status *status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(paos_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 0); + if (err) + return err; + + *status = MLX5_GET(paos_reg, out, admin_status); + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); + +static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu, + int *max_mtu, int *oper_mtu, u8 port) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmtu_reg, in, local_port, port); + + mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 0); + + if (max_mtu) + *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu); + if (oper_mtu) + *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu); + if (admin_mtu) + *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); +} + +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmtu_reg, in, admin_mtu, mtu); + MLX5_SET(pmtu_reg, in, local_port, port); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); + +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, + u8 port) +{ + mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); + +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 port) +{ + mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); + +static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, + int pvlc_size, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(pvlc_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pvlc_reg, in, local_port, local_port); + + return mlx5_core_access_reg(dev, in, sizeof(in), pvlc, + pvlc_size, MLX5_REG_PVLC, 0, 0); +} + +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(pvlc_reg)]; + int err; + + err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port); + if (err) + return err; + + *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); + +int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx, tx_pause); + MLX5_SET(pfcc_reg, in, pprx, rx_pause); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pause); + +int mlx5_query_port_pause(struct mlx5_core_dev *dev, + u32 *rx_pause, u32 *tx_pause) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(pfcc_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 0); + if (err) + return err; + + if (rx_pause) + *rx_pause = MLX5_GET(pfcc_reg, out, pprx); + + if (tx_pause) + *tx_pause = MLX5_GET(pfcc_reg, out, pptx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pause); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/qp.c index dc7dbf7e9..30e2ba3f5 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -187,10 +187,17 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_destroy_qp_mbox_in din; struct mlx5_destroy_qp_mbox_out dout; int err; + void *qpc; memset(&out, 0, sizeof(out)); in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP); + if (dev->issi) { + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + /* 0xffffff means we ask to work with cqe version 0 */ + MLX5_SET(qpc, qpc, user_index, 0xffffff); + } + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); if (err) { mlx5_core_warn(dev, "ret %d\n", err); @@ -338,6 +345,7 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev) { struct mlx5_qp_table *table = &dev->priv.qp_table; + memset(table, 0, sizeof(*table)); spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); mlx5_qp_debugfs_init(dev); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/srq.c index f9d25dcd0..ffada8019 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -37,6 +37,7 @@ #include <linux/mlx5/srq.h> #include <rdma/ib_verbs.h> #include "mlx5_core.h" +#include "transobj.h" void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) { @@ -62,6 +63,74 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) complete(&srq->free); } +static int get_pas_size(void *srqc) +{ + u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12; + u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size); + u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride); + u32 page_offset = MLX5_GET(srqc, srqc, page_offset); + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; + + return rq_num_pas * sizeof(u64); +} + +static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc) +{ + void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + if (srqc_to_rmpc) { + switch (MLX5_GET(srqc, srqc, state)) { + case MLX5_SRQC_STATE_GOOD: + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + break; + case MLX5_SRQC_STATE_ERROR: + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR); + break; + default: + pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__, + __LINE__, MLX5_GET(srqc, srqc, state)); + MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state)); + } + + MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature)); + MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size)); + MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4); + MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size)); + MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset)); + MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm)); + MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd)); + MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr)); + } else { + switch (MLX5_GET(rmpc, rmpc, state)) { + case MLX5_RMPC_STATE_RDY: + MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD); + break; + case MLX5_RMPC_STATE_ERR: + MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR); + break; + default: + pr_warn("%s: %d: Unknown rmp state = 0x%x\n", + __func__, __LINE__, + MLX5_GET(rmpc, rmpc, state)); + MLX5_SET(srqc, srqc, state, + MLX5_GET(rmpc, rmpc, state)); + } + + MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature)); + MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz)); + MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4); + MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz)); + MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset)); + MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm)); + MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd)); + MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr)); + } +} + struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) { struct mlx5_srq_table *table = &dev->priv.srq_table; @@ -79,26 +148,311 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) } EXPORT_SYMBOL(mlx5_core_get_srq); -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen) +static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen) { struct mlx5_create_srq_mbox_out out; - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_destroy_srq_mbox_in din; - struct mlx5_destroy_srq_mbox_out dout; int err; memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); - if (err) - return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); + err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out), + sizeof(out)); srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + return err; +} + +static int destroy_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + struct mlx5_destroy_srq_mbox_in in; + struct mlx5_destroy_srq_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), + (u32 *)(&out), sizeof(out)); +} + +static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + struct mlx5_arm_srq_mbox_in in; + struct mlx5_arm_srq_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); + in.hdr.opmod = cpu_to_be16(!!is_srq); + in.srqn = cpu_to_be32(srq->srqn); + in.lwm = cpu_to_be16(lwm); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), + sizeof(in), (u32 *)(&out), + sizeof(out)); +} + +static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + struct mlx5_query_srq_mbox_in in; + + memset(&in, 0, sizeof(in)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), + (u32 *)out, sizeof(*out)); +} + +static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, + int srq_inlen) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *srqc; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); + pas_size = get_pas_size(srqc); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); + memcpy(pas, in->pas, pas_size); + /* 0xffffff means we ask to work with cqe version 0 */ + MLX5_SET(xrc_srqc, xrc_srqc, user_index, 0xffffff); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; + + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; + + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + + return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *srqc; + void *xrc_srqc; + int err; + + xrcsrq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + err = mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + +out: + kvfree(xrcsrq_out); + return err; +} + +static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int srq_inlen) +{ + void *create_in; + void *rmpc; + void *srqc; + int pas_size; + int inlen; + int err; + + srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); + pas_size = get_pas_size(srqc); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + rmpc_srqc_reformat(srqc, rmpc, true); + + err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + + kvfree(create_in); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + return mlx5_core_destroy_rmp(dev, srq->srqn); +} + +static int arm_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in)); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + return err; +} + +static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + u32 *rmp_out; + void *rmpc; + void *srqc; + int err; + + rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); + if (!rmp_out) + return -ENOMEM; + + err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + rmpc_srqc_reformat(srqc, rmpc, false); + +out: + kvfree(rmp_out); + return err; +} + +static int create_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, + int inlen, int is_xrc) +{ + if (!dev->issi) + return create_srq_cmd(dev, srq, in, inlen); + else if (srq->common.res == MLX5_RES_XSRQ) + return create_xrc_srq_cmd(dev, srq, in, inlen); + else + return create_rmp_cmd(dev, srq, in, inlen); +} + +static int destroy_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + if (!dev->issi) + return destroy_srq_cmd(dev, srq); + else if (srq->common.res == MLX5_RES_XSRQ) + return destroy_xrc_srq_cmd(dev, srq); + else + return destroy_rmp_cmd(dev, srq); +} + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen, + int is_xrc) +{ + int err; + struct mlx5_srq_table *table = &dev->priv.srq_table; + + srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ; + + err = create_srq_split(dev, srq, in, inlen, is_xrc); + if (err) + return err; + atomic_set(&srq->refcount, 1); init_completion(&srq->free); @@ -107,25 +461,20 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, spin_unlock_irq(&table->lock); if (err) { mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); - goto err_cmd; + goto err_destroy_srq_split; } return 0; -err_cmd: - memset(&din, 0, sizeof(din)); - memset(&dout, 0, sizeof(dout)); - din.srqn = cpu_to_be32(srq->srqn); - din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); +err_destroy_srq_split: + destroy_srq_split(dev, srq); + return err; } EXPORT_SYMBOL(mlx5_core_create_srq); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_destroy_srq_mbox_in in; - struct mlx5_destroy_srq_mbox_out out; struct mlx5_srq_table *table = &dev->priv.srq_table; struct mlx5_core_srq *tmp; int err; @@ -142,17 +491,10 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) return -EINVAL; } - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + err = destroy_srq_split(dev, srq); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); wait_for_completion(&srq->free); @@ -164,48 +506,24 @@ EXPORT_SYMBOL(mlx5_core_destroy_srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out) { - struct mlx5_query_srq_mbox_in in; - int err; - - memset(&in, 0, sizeof(in)); - memset(out, 0, sizeof(*out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); - if (err) - return err; - - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + if (!dev->issi) + return query_srq_cmd(dev, srq, out); + else if (srq->common.res == MLX5_RES_XSRQ) + return query_xrc_srq_cmd(dev, srq, out); + else + return query_rmp_cmd(dev, srq, out); } EXPORT_SYMBOL(mlx5_core_query_srq); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - struct mlx5_arm_srq_mbox_in in; - struct mlx5_arm_srq_mbox_out out; - int err; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); - in.hdr.opmod = cpu_to_be16(!!is_srq); - in.srqn = cpu_to_be32(srq->srqn); - in.lwm = cpu_to_be16(lwm); - - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return err; + if (!dev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + else if (srq->common.res == MLX5_RES_XSRQ) + return arm_xrc_srq_cmd(dev, srq, lwm); + else + return arm_rmp_cmd(dev, srq, lwm); } EXPORT_SYMBOL(mlx5_core_arm_srq); @@ -213,6 +531,7 @@ void mlx5_init_srq_table(struct mlx5_core_dev *dev) { struct mlx5_srq_table *table = &dev->priv.srq_table; + memset(table, 0, sizeof(*table)); spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); } diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/transobj.c new file mode 100644 index 000000000..d7068f54e --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "transobj.h" + +int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) +{ + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(alloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *tdn = MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + + return err; +} + +void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(dealloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) +{ + u32 out[MLX5_ST_SZ_DW(create_rq_out)]; + int err; + + MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqn = MLX5_GET(create_rq_out, out, rqn); + + return err; +} + +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rq_out)]; + + MLX5_SET(modify_rq_in, in, rqn, rqn); + MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) +{ + u32 out[MLX5_ST_SZ_DW(create_sq_out)]; + int err; + + MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *sqn = MLX5_GET(create_sq_out, out, sqn); + + return err; +} + +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_sq_out)]; + + MLX5_SET(modify_sq_in, in, sqn, sqn); + MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)]; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} + +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_tir_out)]; + + MLX5_SET(modify_tir_in, in, tirn, tirn); + MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_tir_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)]; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} + +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_tis_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn) +{ + u32 out[MLX5_ST_SZ_DW(create_rmp_out)]; + int err; + + MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *rmpn = MLX5_GET(create_rmp_out, out, rmpn); + + return err; +} + +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rmp_out)]; + + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rmp_in)]; + int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + + memset(in, 0, sizeof(in)); + MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, in, rmpn, rmpn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); +} + +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in)); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, rmpn); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + + return err; +} + +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *xsrqn) +{ + u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + int err; + + MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + + return err; +} + +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + void *srqc; + void *xrc_srqc; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (!err) { + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out, + xrc_srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + } + + return err; +} + +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) +{ + u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; + u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); + MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, in, op_mod, + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqtn) +{ + u32 out[MLX5_ST_SZ_DW(create_rqt_out)]; + int err; + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + return err; +} + +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rqt_out)]; + + MLX5_SET(modify_rqt_in, in, rqtn, rqtn); + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/kernel/drivers/net/ethernet/mellanox/mlx5/core/transobj.h new file mode 100644 index 000000000..74cae5143 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/transobj.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __TRANSOBJ_H__ +#define __TRANSOBJ_H__ + +int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn); +void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn); +int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqn); +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); +void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); +int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *sqn); +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); +void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tirn); +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, + int inlen); +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tisn); +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); + +int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqtn); +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen); +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); + +#endif /* __TRANSOBJ_H__ */ diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 5a89bb1d6..eb05c845e 100644 --- a/kernel/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -32,6 +32,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/io-mapping.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" @@ -175,12 +176,13 @@ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) for (i = 0; i < tot_uuars; i++) { bf = &uuari->bfs[i]; - bf->buf_size = dev->caps.gen.bf_reg_size / 2; + bf->buf_size = (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) / 2; bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; bf->reg = NULL; /* Add WC support */ - bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.gen.bf_reg_size + - MLX5_BF_OFFSET; + bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * + (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) + + MLX5_BF_OFFSET; bf->need_lock = need_uuar_lock(i); spin_lock_init(&bf->lock); spin_lock_init(&bf->lock32); @@ -223,3 +225,45 @@ int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) return 0; } + +int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +{ + phys_addr_t pfn; + phys_addr_t uar_bar_start; + int err; + + err = mlx5_cmd_alloc_uar(mdev, &uar->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + return err; + } + + uar_bar_start = pci_resource_start(mdev->pdev, 0); + pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; + uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!uar->map) { + mlx5_core_warn(mdev, "ioremap() failed, %d\n", err); + err = -ENOMEM; + goto err_free_uar; + } + + if (mdev->priv.bf_mapping) + uar->bf_map = io_mapping_map_wc(mdev->priv.bf_mapping, + uar->index << PAGE_SHIFT); + + return 0; + +err_free_uar: + mlx5_cmd_free_uar(mdev, uar->index); + + return err; +} +EXPORT_SYMBOL(mlx5_alloc_map_uar); + +void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +{ + io_mapping_unmap(uar->bf_map); + iounmap(uar->map); + mlx5_cmd_free_uar(mdev, uar->index); +} +EXPORT_SYMBOL(mlx5_unmap_free_uar); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/vport.c new file mode 100644 index 000000000..b94177ebc --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/export.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include "mlx5_core.h" + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +{ + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_state_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + + return MLX5_GET(query_vport_state_out, out, state); +} +EXPORT_SYMBOL(mlx5_query_vport_state); + +void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u8 *out_addr; + + out = mlx5_vzalloc(outlen); + if (!out) + return; + + out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context.permanent_address); + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + memset(out, 0, outlen); + mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + + ether_addr_copy(addr, &out_addr[2]); + + kvfree(out); +} +EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); + +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + union ib_gid *tmp; + int tbsz; + int nout; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size)); + mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n", + vf_num, gid_index, tbsz); + + if (gid_index > tbsz && gid_index != 0xffff) + return -EINVAL; + + if (gid_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * sizeof(*gid); + + in = kzalloc(in_sz, GFP_KERNEL); + out = kzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto out; + + tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + gid->global.subnet_prefix = tmp->global.subnet_prefix; + gid->global.interface_id = tmp->global.interface_id; + +out: + kfree(in); + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid); + +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + void *pkarr; + int nout; + int tbsz; + int err; + int i; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)); + if (pkey_index > tbsz && pkey_index != 0xffff) + return -EINVAL; + + if (pkey_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * MLX5_ST_SZ_BYTES(pkey); + + in = kzalloc(in_sz, GFP_KERNEL); + out = kzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto out; + + pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey); + for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey)) + *pkey = MLX5_GET_PR(pkey, pkarr, pkey); + +out: + kfree(in); + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey); + +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep) +{ + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); + int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)]; + int is_group_manager; + void *out; + void *ctx; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT); + + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_context_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + + ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context); + rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select); + rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware); + rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi); + rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw); + rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy); + rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state); + rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid); + rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid); + rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1); + rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask1_field_select); + rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2); + rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask2_field_select); + rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid); + rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx, + init_type_reply); + rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc); + rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx, + subnet_timeout); + rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid); + rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl); + rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + qkey_violation_counter); + rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + pkey_violation_counter); + rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required); + rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx, + system_image_guid); + +ex: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); + +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + u64 *sys_image_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *sys_image_guid = rep->sys_image_guid; + + kfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid); + +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *node_guid = rep->node_guid; + + kfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/kernel/drivers/net/ethernet/mellanox/mlx5/core/wq.c new file mode 100644 index 000000000..ce21ee5b2 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include "wq.h" +#include "mlx5_core.h" + +u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) +{ + return (u32)wq->sz_m1 + 1; +} + +u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) +{ + return wq->sz_m1 + 1; +} + +u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) +{ + return (u32)wq->sz_m1 + 1; +} + +static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_get_size(wq) << wq->log_stride; +} + +static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq) +{ + return mlx5_cqwq_get_size(wq) << wq->log_stride; +} + +static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq) +{ + return mlx5_wq_ll_get_size(wq) << wq->log_stride; +} + +int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + int err; + + wq->log_stride = MLX5_GET(wq, wqc, log_wq_stride); + wq->sz_m1 = (1 << MLX5_GET(wq, wqc, log_wq_sz)) - 1; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + return err; + } + + err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + goto err_db_free; + } + + wq->buf = wq_ctrl->buf.direct.buf; + wq->db = wq_ctrl->db.db; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *cqc, struct mlx5_cqwq *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + int err; + + wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz); + wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size); + wq->sz_m1 = (1 << wq->log_sz) - 1; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + return err; + } + + err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + goto err_db_free; + } + + wq->buf = wq_ctrl->buf.direct.buf; + wq->db = wq_ctrl->db.db; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_ll *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + struct mlx5_wqe_srq_next_seg *next_seg; + int err; + int i; + + wq->log_stride = MLX5_GET(wq, wqc, log_wq_stride); + wq->sz_m1 = (1 << MLX5_GET(wq, wqc, log_wq_sz)) - 1; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + return err; + } + + err = mlx5_buf_alloc(mdev, mlx5_wq_ll_get_byte_size(wq), &wq_ctrl->buf); + if (err) { + mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + goto err_db_free; + } + + wq->buf = wq_ctrl->buf.direct.buf; + wq->db = wq_ctrl->db.db; + + for (i = 0; i < wq->sz_m1; i++) { + next_seg = mlx5_wq_ll_get_wqe(wq, i); + next_seg->next_wqe_index = cpu_to_be16(i + 1); + } + next_seg = mlx5_wq_ll_get_wqe(wq, i); + wq->tail_next = &next_seg->next_wqe_index; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) +{ + mlx5_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); + mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db); +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/kernel/drivers/net/ethernet/mellanox/mlx5/core/wq.h new file mode 100644 index 000000000..6c2a8f950 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_WQ_H__ +#define __MLX5_WQ_H__ + +#include <linux/mlx5/mlx5_ifc.h> + +struct mlx5_wq_param { + int linear; + int buf_numa_node; + int db_numa_node; +}; + +struct mlx5_wq_ctrl { + struct mlx5_core_dev *mdev; + struct mlx5_buf buf; + struct mlx5_db db; +}; + +struct mlx5_wq_cyc { + void *buf; + __be32 *db; + u16 sz_m1; + u8 log_stride; +}; + +struct mlx5_cqwq { + void *buf; + __be32 *db; + u32 sz_m1; + u32 cc; /* consumer counter */ + u8 log_sz; + u8 log_stride; +}; + +struct mlx5_wq_ll { + void *buf; + __be32 *db; + __be16 *tail_next; + u16 sz_m1; + u16 head; + u16 wqe_ctr; + u16 cur_sz; + u8 log_stride; +}; + +int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl); +u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); + +int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *cqc, struct mlx5_cqwq *wq, + struct mlx5_wq_ctrl *wq_ctrl); +u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq); + +int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_ll *wq, + struct mlx5_wq_ctrl *wq_ctrl); +u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq); + +void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); + +static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) +{ + return ctr & wq->sz_m1; +} + +static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix) +{ + return wq->buf + (ix << wq->log_stride); +} + +static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) +{ + int equal = (cc1 == cc2); + int smaller = 0x8000 & (cc1 - cc2); + + return !equal && !smaller; +} + +static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) +{ + return wq->cc & wq->sz_m1; +} + +static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) +{ + return wq->buf + (ix << wq->log_stride); +} + +static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq) +{ + return wq->cc >> wq->log_sz; +} + +static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq) +{ + wq->cc++; +} + +static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq) +{ + *wq->db = cpu_to_be32(wq->cc & 0xffffff); +} + +static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq) +{ + return wq->cur_sz == wq->sz_m1; +} + +static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq) +{ + return !wq->cur_sz; +} + +static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix) +{ + return wq->buf + (ix << wq->log_stride); +} + +static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next) +{ + wq->head = head_next; + wq->wqe_ctr++; + wq->cur_sz++; +} + +static inline void mlx5_wq_ll_pop(struct mlx5_wq_ll *wq, __be16 ix, + __be16 *next_tail_next) +{ + *wq->tail_next = ix; + wq->tail_next = next_tail_next; + wq->cur_sz--; +} + +static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq) +{ + *wq->db = cpu_to_be32(wq->wqe_ctr); +} + +#endif /* __MLX5_WQ_H__ */ diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/kernel/drivers/net/ethernet/mellanox/mlxsw/Kconfig new file mode 100644 index 000000000..e36e12219 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -0,0 +1,43 @@ +# +# Mellanox switch drivers configuration +# + +config MLXSW_CORE + tristate "Mellanox Technologies Switch ASICs support" + ---help--- + This driver supports Mellanox Technologies Switch ASICs family. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_core. + +config MLXSW_PCI + tristate "PCI bus implementation for Mellanox Technologies Switch ASICs" + depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE + default m + ---help--- + This is PCI bus implementation for Mellanox Technologies Switch ASICs. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_pci. + +config MLXSW_SWITCHX2 + tristate "Mellanox Technologies SwitchX-2 support" + depends on MLXSW_CORE && NET_SWITCHDEV + default m + ---help--- + This driver supports Mellanox Technologies SwitchX-2 Ethernet + Switch ASICs. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_switchx2. + +config MLXSW_SPECTRUM + tristate "Mellanox Technologies Spectrum support" + depends on MLXSW_CORE && NET_SWITCHDEV + default m + ---help--- + This driver supports Mellanox Technologies Spectrum Ethernet + Switch ASICs. + + To compile this driver as a module, choose M here: the + module will be called mlxsw_spectrum. diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/Makefile b/kernel/drivers/net/ethernet/mellanox/mlxsw/Makefile new file mode 100644 index 000000000..af015818f --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o +mlxsw_core-objs := core.o +obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o +mlxsw_pci-objs := pci.o +obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o +mlxsw_switchx2-objs := switchx2.o +obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o +mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ + spectrum_switchdev.o diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/cmd.h new file mode 100644 index 000000000..cd63b8263 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -0,0 +1,1115 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/cmd.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_CMD_H +#define _MLXSW_CMD_H + +#include "item.h" + +#define MLXSW_CMD_MBOX_SIZE 4096 + +static inline char *mlxsw_cmd_mbox_alloc(void) +{ + return kzalloc(MLXSW_CMD_MBOX_SIZE, GFP_KERNEL); +} + +static inline void mlxsw_cmd_mbox_free(char *mbox) +{ + kfree(mbox); +} + +static inline void mlxsw_cmd_mbox_zero(char *mbox) +{ + memset(mbox, 0, MLXSW_CMD_MBOX_SIZE); +} + +struct mlxsw_core; + +int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size); + +static inline int mlxsw_cmd_exec_in(struct mlxsw_core *mlxsw_core, u16 opcode, + u8 opcode_mod, u32 in_mod, char *in_mbox, + size_t in_mbox_size) +{ + return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, false, + in_mbox, in_mbox_size, NULL, 0); +} + +static inline int mlxsw_cmd_exec_out(struct mlxsw_core *mlxsw_core, u16 opcode, + u8 opcode_mod, u32 in_mod, + bool out_mbox_direct, + char *out_mbox, size_t out_mbox_size) +{ + return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, + out_mbox_direct, NULL, 0, + out_mbox, out_mbox_size); +} + +static inline int mlxsw_cmd_exec_none(struct mlxsw_core *mlxsw_core, u16 opcode, + u8 opcode_mod, u32 in_mod) +{ + return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, false, + NULL, 0, NULL, 0); +} + +enum mlxsw_cmd_opcode { + MLXSW_CMD_OPCODE_QUERY_FW = 0x004, + MLXSW_CMD_OPCODE_QUERY_BOARDINFO = 0x006, + MLXSW_CMD_OPCODE_QUERY_AQ_CAP = 0x003, + MLXSW_CMD_OPCODE_MAP_FA = 0xFFF, + MLXSW_CMD_OPCODE_UNMAP_FA = 0xFFE, + MLXSW_CMD_OPCODE_CONFIG_PROFILE = 0x100, + MLXSW_CMD_OPCODE_ACCESS_REG = 0x040, + MLXSW_CMD_OPCODE_SW2HW_DQ = 0x201, + MLXSW_CMD_OPCODE_HW2SW_DQ = 0x202, + MLXSW_CMD_OPCODE_2ERR_DQ = 0x01E, + MLXSW_CMD_OPCODE_QUERY_DQ = 0x022, + MLXSW_CMD_OPCODE_SW2HW_CQ = 0x016, + MLXSW_CMD_OPCODE_HW2SW_CQ = 0x017, + MLXSW_CMD_OPCODE_QUERY_CQ = 0x018, + MLXSW_CMD_OPCODE_SW2HW_EQ = 0x013, + MLXSW_CMD_OPCODE_HW2SW_EQ = 0x014, + MLXSW_CMD_OPCODE_QUERY_EQ = 0x015, +}; + +static inline const char *mlxsw_cmd_opcode_str(u16 opcode) +{ + switch (opcode) { + case MLXSW_CMD_OPCODE_QUERY_FW: + return "QUERY_FW"; + case MLXSW_CMD_OPCODE_QUERY_BOARDINFO: + return "QUERY_BOARDINFO"; + case MLXSW_CMD_OPCODE_QUERY_AQ_CAP: + return "QUERY_AQ_CAP"; + case MLXSW_CMD_OPCODE_MAP_FA: + return "MAP_FA"; + case MLXSW_CMD_OPCODE_UNMAP_FA: + return "UNMAP_FA"; + case MLXSW_CMD_OPCODE_CONFIG_PROFILE: + return "CONFIG_PROFILE"; + case MLXSW_CMD_OPCODE_ACCESS_REG: + return "ACCESS_REG"; + case MLXSW_CMD_OPCODE_SW2HW_DQ: + return "SW2HW_DQ"; + case MLXSW_CMD_OPCODE_HW2SW_DQ: + return "HW2SW_DQ"; + case MLXSW_CMD_OPCODE_2ERR_DQ: + return "2ERR_DQ"; + case MLXSW_CMD_OPCODE_QUERY_DQ: + return "QUERY_DQ"; + case MLXSW_CMD_OPCODE_SW2HW_CQ: + return "SW2HW_CQ"; + case MLXSW_CMD_OPCODE_HW2SW_CQ: + return "HW2SW_CQ"; + case MLXSW_CMD_OPCODE_QUERY_CQ: + return "QUERY_CQ"; + case MLXSW_CMD_OPCODE_SW2HW_EQ: + return "SW2HW_EQ"; + case MLXSW_CMD_OPCODE_HW2SW_EQ: + return "HW2SW_EQ"; + case MLXSW_CMD_OPCODE_QUERY_EQ: + return "QUERY_EQ"; + default: + return "*UNKNOWN*"; + } +} + +enum mlxsw_cmd_status { + /* Command execution succeeded. */ + MLXSW_CMD_STATUS_OK = 0x00, + /* Internal error (e.g. bus error) occurred while processing command. */ + MLXSW_CMD_STATUS_INTERNAL_ERR = 0x01, + /* Operation/command not supported or opcode modifier not supported. */ + MLXSW_CMD_STATUS_BAD_OP = 0x02, + /* Parameter not supported, parameter out of range. */ + MLXSW_CMD_STATUS_BAD_PARAM = 0x03, + /* System was not enabled or bad system state. */ + MLXSW_CMD_STATUS_BAD_SYS_STATE = 0x04, + /* Attempt to access reserved or unallocated resource, or resource in + * inappropriate ownership. + */ + MLXSW_CMD_STATUS_BAD_RESOURCE = 0x05, + /* Requested resource is currently executing a command. */ + MLXSW_CMD_STATUS_RESOURCE_BUSY = 0x06, + /* Required capability exceeds device limits. */ + MLXSW_CMD_STATUS_EXCEED_LIM = 0x08, + /* Resource is not in the appropriate state or ownership. */ + MLXSW_CMD_STATUS_BAD_RES_STATE = 0x09, + /* Index out of range (might be beyond table size or attempt to + * access a reserved resource). + */ + MLXSW_CMD_STATUS_BAD_INDEX = 0x0A, + /* NVMEM checksum/CRC failed. */ + MLXSW_CMD_STATUS_BAD_NVMEM = 0x0B, + /* Bad management packet (silently discarded). */ + MLXSW_CMD_STATUS_BAD_PKT = 0x30, +}; + +static inline const char *mlxsw_cmd_status_str(u8 status) +{ + switch (status) { + case MLXSW_CMD_STATUS_OK: + return "OK"; + case MLXSW_CMD_STATUS_INTERNAL_ERR: + return "INTERNAL_ERR"; + case MLXSW_CMD_STATUS_BAD_OP: + return "BAD_OP"; + case MLXSW_CMD_STATUS_BAD_PARAM: + return "BAD_PARAM"; + case MLXSW_CMD_STATUS_BAD_SYS_STATE: + return "BAD_SYS_STATE"; + case MLXSW_CMD_STATUS_BAD_RESOURCE: + return "BAD_RESOURCE"; + case MLXSW_CMD_STATUS_RESOURCE_BUSY: + return "RESOURCE_BUSY"; + case MLXSW_CMD_STATUS_EXCEED_LIM: + return "EXCEED_LIM"; + case MLXSW_CMD_STATUS_BAD_RES_STATE: + return "BAD_RES_STATE"; + case MLXSW_CMD_STATUS_BAD_INDEX: + return "BAD_INDEX"; + case MLXSW_CMD_STATUS_BAD_NVMEM: + return "BAD_NVMEM"; + case MLXSW_CMD_STATUS_BAD_PKT: + return "BAD_PKT"; + default: + return "*UNKNOWN*"; + } +} + +/* QUERY_FW - Query Firmware + * ------------------------- + * OpMod == 0, INMmod == 0 + * ----------------------- + * The QUERY_FW command retrieves information related to firmware, command + * interface version and the amount of resources that should be allocated to + * the firmware. + */ + +static inline int mlxsw_cmd_query_fw(struct mlxsw_core *mlxsw_core, + char *out_mbox) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_FW, + 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_query_fw_fw_pages + * Amount of physical memory to be allocatedfor firmware usage in 4KB pages. + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_pages, 0x00, 16, 16); + +/* cmd_mbox_query_fw_fw_rev_major + * Firmware Revision - Major + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_major, 0x00, 0, 16); + +/* cmd_mbox_query_fw_fw_rev_subminor + * Firmware Sub-minor version (Patch level) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_subminor, 0x04, 16, 16); + +/* cmd_mbox_query_fw_fw_rev_minor + * Firmware Revision - Minor + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_minor, 0x04, 0, 16); + +/* cmd_mbox_query_fw_core_clk + * Internal Clock Frequency (in MHz) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, core_clk, 0x08, 16, 16); + +/* cmd_mbox_query_fw_cmd_interface_rev + * Command Interface Interpreter Revision ID. This number is bumped up + * every time a non-backward-compatible change is done for the command + * interface. The current cmd_interface_rev is 1. + */ +MLXSW_ITEM32(cmd_mbox, query_fw, cmd_interface_rev, 0x08, 0, 16); + +/* cmd_mbox_query_fw_dt + * If set, Debug Trace is supported + */ +MLXSW_ITEM32(cmd_mbox, query_fw, dt, 0x0C, 31, 1); + +/* cmd_mbox_query_fw_api_version + * Indicates the version of the API, to enable software querying + * for compatibility. The current api_version is 1. + */ +MLXSW_ITEM32(cmd_mbox, query_fw, api_version, 0x0C, 0, 16); + +/* cmd_mbox_query_fw_fw_hour + * Firmware timestamp - hour + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_hour, 0x10, 24, 8); + +/* cmd_mbox_query_fw_fw_minutes + * Firmware timestamp - minutes + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_minutes, 0x10, 16, 8); + +/* cmd_mbox_query_fw_fw_seconds + * Firmware timestamp - seconds + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_seconds, 0x10, 8, 8); + +/* cmd_mbox_query_fw_fw_year + * Firmware timestamp - year + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_year, 0x14, 16, 16); + +/* cmd_mbox_query_fw_fw_month + * Firmware timestamp - month + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_month, 0x14, 8, 8); + +/* cmd_mbox_query_fw_fw_day + * Firmware timestamp - day + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fw_day, 0x14, 0, 8); + +/* cmd_mbox_query_fw_clr_int_base_offset + * Clear Interrupt register's offset from clr_int_bar register + * in PCI address space. + */ +MLXSW_ITEM64(cmd_mbox, query_fw, clr_int_base_offset, 0x20, 0, 64); + +/* cmd_mbox_query_fw_clr_int_bar + * PCI base address register (BAR) where clr_int register is located. + * 00 - BAR 0-1 (64 bit BAR) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, clr_int_bar, 0x28, 30, 2); + +/* cmd_mbox_query_fw_error_buf_offset + * Read Only buffer for internal error reports of offset + * from error_buf_bar register in PCI address space). + */ +MLXSW_ITEM64(cmd_mbox, query_fw, error_buf_offset, 0x30, 0, 64); + +/* cmd_mbox_query_fw_error_buf_size + * Internal error buffer size in DWORDs + */ +MLXSW_ITEM32(cmd_mbox, query_fw, error_buf_size, 0x38, 0, 32); + +/* cmd_mbox_query_fw_error_int_bar + * PCI base address register (BAR) where error buffer + * register is located. + * 00 - BAR 0-1 (64 bit BAR) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, error_int_bar, 0x3C, 30, 2); + +/* cmd_mbox_query_fw_doorbell_page_offset + * Offset of the doorbell page + */ +MLXSW_ITEM64(cmd_mbox, query_fw, doorbell_page_offset, 0x40, 0, 64); + +/* cmd_mbox_query_fw_doorbell_page_bar + * PCI base address register (BAR) of the doorbell page + * 00 - BAR 0-1 (64 bit BAR) + */ +MLXSW_ITEM32(cmd_mbox, query_fw, doorbell_page_bar, 0x48, 30, 2); + +/* QUERY_BOARDINFO - Query Board Information + * ----------------------------------------- + * OpMod == 0 (N/A), INMmod == 0 (N/A) + * ----------------------------------- + * The QUERY_BOARDINFO command retrieves adapter specific parameters. + */ + +static inline int mlxsw_cmd_boardinfo(struct mlxsw_core *mlxsw_core, + char *out_mbox) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_BOARDINFO, + 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_boardinfo_intapin + * When PCIe interrupt messages are being used, this value is used for clearing + * an interrupt. When using MSI-X, this register is not used. + */ +MLXSW_ITEM32(cmd_mbox, boardinfo, intapin, 0x10, 24, 8); + +/* cmd_mbox_boardinfo_vsd_vendor_id + * PCISIG Vendor ID (www.pcisig.com/membership/vid_search) of the vendor + * specifying/formatting the VSD. The vsd_vendor_id identifies the management + * domain of the VSD/PSID data. Different vendors may choose different VSD/PSID + * format and encoding as long as they use their assigned vsd_vendor_id. + */ +MLXSW_ITEM32(cmd_mbox, boardinfo, vsd_vendor_id, 0x1C, 0, 16); + +/* cmd_mbox_boardinfo_vsd + * Vendor Specific Data. The VSD string that is burnt to the Flash + * with the firmware. + */ +#define MLXSW_CMD_BOARDINFO_VSD_LEN 208 +MLXSW_ITEM_BUF(cmd_mbox, boardinfo, vsd, 0x20, MLXSW_CMD_BOARDINFO_VSD_LEN); + +/* cmd_mbox_boardinfo_psid + * The PSID field is a 16-ascii (byte) character string which acts as + * the board ID. The PSID format is used in conjunction with + * Mellanox vsd_vendor_id (15B3h). + */ +#define MLXSW_CMD_BOARDINFO_PSID_LEN 16 +MLXSW_ITEM_BUF(cmd_mbox, boardinfo, psid, 0xF0, MLXSW_CMD_BOARDINFO_PSID_LEN); + +/* QUERY_AQ_CAP - Query Asynchronous Queues Capabilities + * ----------------------------------------------------- + * OpMod == 0 (N/A), INMmod == 0 (N/A) + * ----------------------------------- + * The QUERY_AQ_CAP command returns the device asynchronous queues + * capabilities supported. + */ + +static inline int mlxsw_cmd_query_aq_cap(struct mlxsw_core *mlxsw_core, + char *out_mbox) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_AQ_CAP, + 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_query_aq_cap_log_max_sdq_sz + * Log (base 2) of max WQEs allowed on SDQ. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_sdq_sz, 0x00, 24, 8); + +/* cmd_mbox_query_aq_cap_max_num_sdqs + * Maximum number of SDQs. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_sdqs, 0x00, 0, 8); + +/* cmd_mbox_query_aq_cap_log_max_rdq_sz + * Log (base 2) of max WQEs allowed on RDQ. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_rdq_sz, 0x04, 24, 8); + +/* cmd_mbox_query_aq_cap_max_num_rdqs + * Maximum number of RDQs. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_rdqs, 0x04, 0, 8); + +/* cmd_mbox_query_aq_cap_log_max_cq_sz + * Log (base 2) of max CQEs allowed on CQ. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cq_sz, 0x08, 24, 8); + +/* cmd_mbox_query_aq_cap_max_num_cqs + * Maximum number of CQs. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_cqs, 0x08, 0, 8); + +/* cmd_mbox_query_aq_cap_log_max_eq_sz + * Log (base 2) of max EQEs allowed on EQ. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_eq_sz, 0x0C, 24, 8); + +/* cmd_mbox_query_aq_cap_max_num_eqs + * Maximum number of EQs. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_eqs, 0x0C, 0, 8); + +/* cmd_mbox_query_aq_cap_max_sg_sq + * The maximum S/G list elements in an DSQ. DSQ must not contain + * more S/G entries than indicated here. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_sg_sq, 0x10, 8, 8); + +/* cmd_mbox_query_aq_cap_ + * The maximum S/G list elements in an DRQ. DRQ must not contain + * more S/G entries than indicated here. + */ +MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_sg_rq, 0x10, 0, 8); + +/* MAP_FA - Map Firmware Area + * -------------------------- + * OpMod == 0 (N/A), INMmod == Number of VPM entries + * ------------------------------------------------- + * The MAP_FA command passes physical pages to the switch. These pages + * are used to store the device firmware. MAP_FA can be executed multiple + * times until all the firmware area is mapped (the size that should be + * mapped is retrieved through the QUERY_FW command). All required pages + * must be mapped to finish the initialization phase. Physical memory + * passed in this command must be pinned. + */ + +#define MLXSW_CMD_MAP_FA_VPM_ENTRIES_MAX 32 + +static inline int mlxsw_cmd_map_fa(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 vpm_entries_count) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_MAP_FA, + 0, vpm_entries_count, + in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_map_fa_pa + * Physical Address. + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, map_fa, pa, 0x00, 12, 52, 0x08, 0x00, true); + +/* cmd_mbox_map_fa_log2size + * Log (base 2) of the size in 4KB pages of the physical and contiguous memory + * that starts at PA_L/H. + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, map_fa, log2size, 0x00, 0, 5, 0x08, 0x04, false); + +/* UNMAP_FA - Unmap Firmware Area + * ------------------------------ + * OpMod == 0 (N/A), INMmod == 0 (N/A) + * ----------------------------------- + * The UNMAP_FA command unload the firmware and unmaps all the + * firmware area. After this command is completed the device will not access + * the pages that were mapped to the firmware area. After executing UNMAP_FA + * command, software reset must be done prior to execution of MAP_FW command. + */ + +static inline int mlxsw_cmd_unmap_fa(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_UNMAP_FA, 0, 0); +} + +/* CONFIG_PROFILE (Set) - Configure Switch Profile + * ------------------------------ + * OpMod == 1 (Set), INMmod == 0 (N/A) + * ----------------------------------- + * The CONFIG_PROFILE command sets the switch profile. The command can be + * executed on the device only once at startup in order to allocate and + * configure all switch resources and prepare it for operational mode. + * It is not possible to change the device profile after the chip is + * in operational mode. + * Failure of the CONFIG_PROFILE command leaves the hardware in an indeterminate + * state therefore it is required to perform software reset to the device + * following an unsuccessful completion of the command. It is required + * to perform software reset to the device to change an existing profile. + */ + +static inline int mlxsw_cmd_config_profile_set(struct mlxsw_core *mlxsw_core, + char *in_mbox) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_CONFIG_PROFILE, + 1, 0, in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_config_profile_set_max_vepa_channels + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_vepa_channels, 0x0C, 0, 1); + +/* cmd_mbox_config_profile_set_max_lag + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_lag, 0x0C, 1, 1); + +/* cmd_mbox_config_profile_set_max_port_per_lag + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_port_per_lag, 0x0C, 2, 1); + +/* cmd_mbox_config_profile_set_max_mid + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_mid, 0x0C, 3, 1); + +/* cmd_mbox_config_profile_set_max_pgt + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_pgt, 0x0C, 4, 1); + +/* cmd_mbox_config_profile_set_max_system_port + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_system_port, 0x0C, 5, 1); + +/* cmd_mbox_config_profile_set_max_vlan_groups + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_vlan_groups, 0x0C, 6, 1); + +/* cmd_mbox_config_profile_set_max_regions + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_regions, 0x0C, 7, 1); + +/* cmd_mbox_config_profile_set_flood_mode + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_flood_mode, 0x0C, 8, 1); + +/* cmd_mbox_config_profile_set_max_flood_tables + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_flood_tables, 0x0C, 9, 1); + +/* cmd_mbox_config_profile_set_max_ib_mc + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_ib_mc, 0x0C, 12, 1); + +/* cmd_mbox_config_profile_set_max_pkey + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_max_pkey, 0x0C, 13, 1); + +/* cmd_mbox_config_profile_set_adaptive_routing_group_cap + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, + set_adaptive_routing_group_cap, 0x0C, 14, 1); + +/* cmd_mbox_config_profile_set_ar_sec + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1); + +/* cmd_mbox_config_profile_max_vepa_channels + * Maximum number of VEPA channels per port (0 through 16) + * 0 - multi-channel VEPA is disabled + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_vepa_channels, 0x10, 0, 8); + +/* cmd_mbox_config_profile_max_lag + * Maximum number of LAG IDs requested. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_lag, 0x14, 0, 16); + +/* cmd_mbox_config_profile_max_port_per_lag + * Maximum number of ports per LAG requested. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_port_per_lag, 0x18, 0, 16); + +/* cmd_mbox_config_profile_max_mid + * Maximum Multicast IDs. + * Multicast IDs are allocated from 0 to max_mid-1 + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_mid, 0x1C, 0, 16); + +/* cmd_mbox_config_profile_max_pgt + * Maximum records in the Port Group Table per Switch Partition. + * Port Group Table indexes are from 0 to max_pgt-1 + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_pgt, 0x20, 0, 16); + +/* cmd_mbox_config_profile_max_system_port + * The maximum number of system ports that can be allocated. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_system_port, 0x24, 0, 16); + +/* cmd_mbox_config_profile_max_vlan_groups + * Maximum number VLAN Groups for VLAN binding. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_vlan_groups, 0x28, 0, 12); + +/* cmd_mbox_config_profile_max_regions + * Maximum number of TCAM Regions. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_regions, 0x2C, 0, 16); + +/* cmd_mbox_config_profile_max_flood_tables + * Maximum number of single-entry flooding tables. Different flooding tables + * can be associated with different packet types. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_flood_tables, 0x30, 16, 4); + +/* cmd_mbox_config_profile_max_vid_flood_tables + * Maximum number of per-vid flooding tables. Flooding tables are associated + * to the different packet types for the different switch partitions. + * Table size is 4K entries covering all VID space. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_vid_flood_tables, 0x30, 8, 4); + +/* cmd_mbox_config_profile_flood_mode + * Flooding mode to use. + * 0-2 - Backward compatible modes for SwitchX devices. + * 3 - Mixed mode, where: + * max_flood_tables indicates the number of single-entry tables. + * max_vid_flood_tables indicates the number of per-VID tables. + * max_fid_offset_flood_tables indicates the number of FID-offset tables. + * max_fid_flood_tables indicates the number of per-FID tables. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, flood_mode, 0x30, 0, 2); + +/* cmd_mbox_config_profile_max_fid_offset_flood_tables + * Maximum number of FID-offset flooding tables. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, + max_fid_offset_flood_tables, 0x34, 24, 4); + +/* cmd_mbox_config_profile_fid_offset_flood_table_size + * The size (number of entries) of each FID-offset flood table. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, + fid_offset_flood_table_size, 0x34, 0, 16); + +/* cmd_mbox_config_profile_max_fid_flood_tables + * Maximum number of per-FID flooding tables. + * + * Note: This flooding tables cover special FIDs only (vFIDs), starting at + * FID value 4K and higher. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_fid_flood_tables, 0x38, 24, 4); + +/* cmd_mbox_config_profile_fid_flood_table_size + * The size (number of entries) of each per-FID table. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, fid_flood_table_size, 0x38, 0, 16); + +/* cmd_mbox_config_profile_max_ib_mc + * Maximum number of multicast FDB records for InfiniBand + * FDB (in 512 chunks) per InfiniBand switch partition. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_ib_mc, 0x40, 0, 15); + +/* cmd_mbox_config_profile_max_pkey + * Maximum per port PKEY table size (for PKEY enforcement) + */ +MLXSW_ITEM32(cmd_mbox, config_profile, max_pkey, 0x44, 0, 15); + +/* cmd_mbox_config_profile_ar_sec + * Primary/secondary capability + * Describes the number of adaptive routing sub-groups + * 0 - disable primary/secondary (single group) + * 1 - enable primary/secondary (2 sub-groups) + * 2 - 3 sub-groups: Not supported in SwitchX, SwitchX-2 + * 3 - 4 sub-groups: Not supported in SwitchX, SwitchX-2 + */ +MLXSW_ITEM32(cmd_mbox, config_profile, ar_sec, 0x4C, 24, 2); + +/* cmd_mbox_config_profile_adaptive_routing_group_cap + * Adaptive Routing Group Capability. Indicates the number of AR groups + * supported. Note that when Primary/secondary is enabled, each + * primary/secondary couple consumes 2 adaptive routing entries. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16); + +/* cmd_mbox_config_profile_arn + * Adaptive Routing Notification Enable + * Not supported in SwitchX, SwitchX-2 + */ +MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1); + +/* cmd_mbox_config_profile_swid_config_mask + * Modify Switch Partition Configuration mask. When set, the configu- + * ration value for the Switch Partition are taken from the mailbox. + * When clear, the current configuration values are used. + * Bit 0 - set type + * Bit 1 - properties + * Other - reserved + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_mask, + 0x60, 24, 8, 0x08, 0x00, false); + +/* cmd_mbox_config_profile_swid_config_type + * Switch Partition type. + * 0000 - disabled (Switch Partition does not exist) + * 0001 - InfiniBand + * 0010 - Ethernet + * 1000 - router port (SwitchX-2 only) + * Other - reserved + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_type, + 0x60, 20, 4, 0x08, 0x00, false); + +/* cmd_mbox_config_profile_swid_config_properties + * Switch Partition properties. + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_properties, + 0x60, 0, 8, 0x08, 0x00, false); + +/* ACCESS_REG - Access EMAD Supported Register + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == 0 (N/A) + * ------------------------------------- + * The ACCESS_REG command supports accessing device registers. This access + * is mainly used for bootstrapping. + */ + +static inline int mlxsw_cmd_access_reg(struct mlxsw_core *mlxsw_core, + char *in_mbox, char *out_mbox) +{ + return mlxsw_cmd_exec(mlxsw_core, MLXSW_CMD_OPCODE_ACCESS_REG, + 0, 0, false, in_mbox, MLXSW_CMD_MBOX_SIZE, + out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* SW2HW_DQ - Software to Hardware DQ + * ---------------------------------- + * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ) + * INMmod == DQ number + * ---------------------------------------------- + * The SW2HW_DQ command transitions a descriptor queue from software to + * hardware ownership. The command enables posting WQEs and ringing DoorBells + * on the descriptor queue. + */ + +static inline int __mlxsw_cmd_sw2hw_dq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 dq_number, + u8 opcode_mod) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_DQ, + opcode_mod, dq_number, + in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +enum { + MLXSW_CMD_OPCODE_MOD_SDQ = 0, + MLXSW_CMD_OPCODE_MOD_RDQ = 1, +}; + +static inline int mlxsw_cmd_sw2hw_sdq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 dq_number) +{ + return __mlxsw_cmd_sw2hw_dq(mlxsw_core, in_mbox, dq_number, + MLXSW_CMD_OPCODE_MOD_SDQ); +} + +static inline int mlxsw_cmd_sw2hw_rdq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 dq_number) +{ + return __mlxsw_cmd_sw2hw_dq(mlxsw_core, in_mbox, dq_number, + MLXSW_CMD_OPCODE_MOD_RDQ); +} + +/* cmd_mbox_sw2hw_dq_cq + * Number of the CQ that this Descriptor Queue reports completions to. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_dq, cq, 0x00, 24, 8); + +/* cmd_mbox_sw2hw_dq_sdq_tclass + * SDQ: CPU Egress TClass + * RDQ: Reserved + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_dq, sdq_tclass, 0x00, 16, 6); + +/* cmd_mbox_sw2hw_dq_log2_dq_sz + * Log (base 2) of the Descriptor Queue size in 4KB pages. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_dq, log2_dq_sz, 0x00, 0, 6); + +/* cmd_mbox_sw2hw_dq_pa + * Physical Address. + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_dq, pa, 0x10, 12, 52, 0x08, 0x00, true); + +/* HW2SW_DQ - Hardware to Software DQ + * ---------------------------------- + * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ) + * INMmod == DQ number + * ---------------------------------------------- + * The HW2SW_DQ command transitions a descriptor queue from hardware to + * software ownership. Incoming packets on the DQ are silently discarded, + * SW should not post descriptors on nonoperational DQs. + */ + +static inline int __mlxsw_cmd_hw2sw_dq(struct mlxsw_core *mlxsw_core, + u32 dq_number, u8 opcode_mod) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_DQ, + opcode_mod, dq_number); +} + +static inline int mlxsw_cmd_hw2sw_sdq(struct mlxsw_core *mlxsw_core, + u32 dq_number) +{ + return __mlxsw_cmd_hw2sw_dq(mlxsw_core, dq_number, + MLXSW_CMD_OPCODE_MOD_SDQ); +} + +static inline int mlxsw_cmd_hw2sw_rdq(struct mlxsw_core *mlxsw_core, + u32 dq_number) +{ + return __mlxsw_cmd_hw2sw_dq(mlxsw_core, dq_number, + MLXSW_CMD_OPCODE_MOD_RDQ); +} + +/* 2ERR_DQ - To Error DQ + * --------------------- + * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ) + * INMmod == DQ number + * ---------------------------------------------- + * The 2ERR_DQ command transitions the DQ into the error state from the state + * in which it has been. While the command is executed, some in-process + * descriptors may complete. Once the DQ transitions into the error state, + * if there are posted descriptors on the RDQ/SDQ, the hardware writes + * a completion with error (flushed) for all descriptors posted in the RDQ/SDQ. + * When the command is completed successfully, the DQ is already in + * the error state. + */ + +static inline int __mlxsw_cmd_2err_dq(struct mlxsw_core *mlxsw_core, + u32 dq_number, u8 opcode_mod) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_2ERR_DQ, + opcode_mod, dq_number); +} + +static inline int mlxsw_cmd_2err_sdq(struct mlxsw_core *mlxsw_core, + u32 dq_number) +{ + return __mlxsw_cmd_2err_dq(mlxsw_core, dq_number, + MLXSW_CMD_OPCODE_MOD_SDQ); +} + +static inline int mlxsw_cmd_2err_rdq(struct mlxsw_core *mlxsw_core, + u32 dq_number) +{ + return __mlxsw_cmd_2err_dq(mlxsw_core, dq_number, + MLXSW_CMD_OPCODE_MOD_RDQ); +} + +/* QUERY_DQ - Query DQ + * --------------------- + * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ) + * INMmod == DQ number + * ---------------------------------------------- + * The QUERY_DQ command retrieves a snapshot of DQ parameters from the hardware. + * + * Note: Output mailbox has the same format as SW2HW_DQ. + */ + +static inline int __mlxsw_cmd_query_dq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 dq_number, + u8 opcode_mod) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_2ERR_DQ, + opcode_mod, dq_number, false, + out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +static inline int mlxsw_cmd_query_sdq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 dq_number) +{ + return __mlxsw_cmd_query_dq(mlxsw_core, out_mbox, dq_number, + MLXSW_CMD_OPCODE_MOD_SDQ); +} + +static inline int mlxsw_cmd_query_rdq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 dq_number) +{ + return __mlxsw_cmd_query_dq(mlxsw_core, out_mbox, dq_number, + MLXSW_CMD_OPCODE_MOD_RDQ); +} + +/* SW2HW_CQ - Software to Hardware CQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == CQ number + * ------------------------------------- + * The SW2HW_CQ command transfers ownership of a CQ context entry from software + * to hardware. The command takes the CQ context entry from the input mailbox + * and stores it in the CQC in the ownership of the hardware. The command fails + * if the requested CQC entry is already in the ownership of the hardware. + */ + +static inline int mlxsw_cmd_sw2hw_cq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 cq_number) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_CQ, + 0, cq_number, in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_sw2hw_cq_cv + * CQE Version. + * 0 - CQE Version 0, 1 - CQE Version 1 + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cv, 0x00, 28, 4); + +/* cmd_mbox_sw2hw_cq_c_eqn + * Event Queue this CQ reports completion events to. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, c_eqn, 0x00, 24, 1); + +/* cmd_mbox_sw2hw_cq_oi + * When set, overrun ignore is enabled. When set, updates of + * CQ consumer counter (poll for completion) or Request completion + * notifications (Arm CQ) DoorBells should not be rung on that CQ. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, oi, 0x00, 12, 1); + +/* cmd_mbox_sw2hw_cq_st + * Event delivery state machine + * 0x0 - FIRED + * 0x1 - ARMED (Request for Notification) + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, st, 0x00, 8, 1); + +/* cmd_mbox_sw2hw_cq_log_cq_size + * Log (base 2) of the CQ size (in entries). + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, log_cq_size, 0x00, 0, 4); + +/* cmd_mbox_sw2hw_cq_producer_counter + * Producer Counter. The counter is incremented for each CQE that is + * written by the HW to the CQ. + * Maintained by HW (valid for the QUERY_CQ command only) + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_cq, producer_counter, 0x04, 0, 16); + +/* cmd_mbox_sw2hw_cq_pa + * Physical Address. + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_cq, pa, 0x10, 11, 53, 0x08, 0x00, true); + +/* HW2SW_CQ - Hardware to Software CQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == CQ number + * ------------------------------------- + * The HW2SW_CQ command transfers ownership of a CQ context entry from hardware + * to software. The CQC entry is invalidated as a result of this command. + */ + +static inline int mlxsw_cmd_hw2sw_cq(struct mlxsw_core *mlxsw_core, + u32 cq_number) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_CQ, + 0, cq_number); +} + +/* QUERY_CQ - Query CQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == CQ number + * ------------------------------------- + * The QUERY_CQ command retrieves a snapshot of the current CQ context entry. + * The command stores the snapshot in the output mailbox in the software format. + * Note that the CQ context state and values are not affected by the QUERY_CQ + * command. The QUERY_CQ command is for debug purposes only. + * + * Note: Output mailbox has the same format as SW2HW_CQ. + */ + +static inline int mlxsw_cmd_query_cq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 cq_number) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_CQ, + 0, cq_number, false, + out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* SW2HW_EQ - Software to Hardware EQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == EQ number + * ------------------------------------- + * The SW2HW_EQ command transfers ownership of an EQ context entry from software + * to hardware. The command takes the EQ context entry from the input mailbox + * and stores it in the EQC in the ownership of the hardware. The command fails + * if the requested EQC entry is already in the ownership of the hardware. + */ + +static inline int mlxsw_cmd_sw2hw_eq(struct mlxsw_core *mlxsw_core, + char *in_mbox, u32 eq_number) +{ + return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_EQ, + 0, eq_number, in_mbox, MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_sw2hw_eq_int_msix + * When set, MSI-X cycles will be generated by this EQ. + * When cleared, an interrupt will be generated by this EQ. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, int_msix, 0x00, 24, 1); + +/* cmd_mbox_sw2hw_eq_int_oi + * When set, overrun ignore is enabled. + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, oi, 0x00, 12, 1); + +/* cmd_mbox_sw2hw_eq_int_st + * Event delivery state machine + * 0x0 - FIRED + * 0x1 - ARMED (Request for Notification) + * 0x11 - Always ARMED + * other - reserved + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, st, 0x00, 8, 2); + +/* cmd_mbox_sw2hw_eq_int_log_eq_size + * Log (base 2) of the EQ size (in entries). + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, log_eq_size, 0x00, 0, 4); + +/* cmd_mbox_sw2hw_eq_int_producer_counter + * Producer Counter. The counter is incremented for each EQE that is written + * by the HW to the EQ. + * Maintained by HW (valid for the QUERY_EQ command only) + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_eq, producer_counter, 0x04, 0, 16); + +/* cmd_mbox_sw2hw_eq_int_pa + * Physical Address. + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_eq, pa, 0x10, 11, 53, 0x08, 0x00, true); + +/* HW2SW_EQ - Hardware to Software EQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == EQ number + * ------------------------------------- + */ + +static inline int mlxsw_cmd_hw2sw_eq(struct mlxsw_core *mlxsw_core, + u32 eq_number) +{ + return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_EQ, + 0, eq_number); +} + +/* QUERY_EQ - Query EQ + * ---------------------------------- + * OpMod == 0 (N/A), INMmod == EQ number + * ------------------------------------- + * + * Note: Output mailbox has the same format as SW2HW_EQ. + */ + +static inline int mlxsw_cmd_query_eq(struct mlxsw_core *mlxsw_core, + char *out_mbox, u32 eq_number) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_EQ, + 0, eq_number, false, + out_mbox, MLXSW_CMD_MBOX_SIZE); +} + +#endif diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/core.c b/kernel/drivers/net/ethernet/mellanox/mlxsw/core.c new file mode 100644 index 000000000..97f0d93ca --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -0,0 +1,1299 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/export.h> +#include <linux/err.h> +#include <linux/if_link.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/u64_stats_sync.h> +#include <linux/netdevice.h> +#include <linux/wait.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/gfp.h> +#include <linux/random.h> +#include <linux/jiffies.h> +#include <linux/mutex.h> +#include <linux/rcupdate.h> +#include <linux/slab.h> +#include <asm/byteorder.h> + +#include "core.h" +#include "item.h" +#include "cmd.h" +#include "port.h" +#include "trap.h" +#include "emad.h" +#include "reg.h" + +static LIST_HEAD(mlxsw_core_driver_list); +static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock); + +static const char mlxsw_core_driver_name[] = "mlxsw_core"; + +static struct dentry *mlxsw_core_dbg_root; + +struct mlxsw_core_pcpu_stats { + u64 trap_rx_packets[MLXSW_TRAP_ID_MAX]; + u64 trap_rx_bytes[MLXSW_TRAP_ID_MAX]; + u64 port_rx_packets[MLXSW_PORT_MAX_PORTS]; + u64 port_rx_bytes[MLXSW_PORT_MAX_PORTS]; + struct u64_stats_sync syncp; + u32 trap_rx_dropped[MLXSW_TRAP_ID_MAX]; + u32 port_rx_dropped[MLXSW_PORT_MAX_PORTS]; + u32 trap_rx_invalid; + u32 port_rx_invalid; +}; + +struct mlxsw_core { + struct mlxsw_driver *driver; + const struct mlxsw_bus *bus; + void *bus_priv; + const struct mlxsw_bus_info *bus_info; + struct list_head rx_listener_list; + struct list_head event_listener_list; + struct { + struct sk_buff *resp_skb; + u64 tid; + wait_queue_head_t wait; + bool trans_active; + struct mutex lock; /* One EMAD transaction at a time. */ + bool use_emad; + } emad; + struct mlxsw_core_pcpu_stats __percpu *pcpu_stats; + struct dentry *dbg_dir; + struct { + struct debugfs_blob_wrapper vsd_blob; + struct debugfs_blob_wrapper psid_blob; + } dbg; + unsigned long driver_priv[0]; + /* driver_priv has to be always the last item */ +}; + +struct mlxsw_rx_listener_item { + struct list_head list; + struct mlxsw_rx_listener rxl; + void *priv; +}; + +struct mlxsw_event_listener_item { + struct list_head list; + struct mlxsw_event_listener el; + void *priv; +}; + +/****************** + * EMAD processing + ******************/ + +/* emad_eth_hdr_dmac + * Destination MAC in EMAD's Ethernet header. + * Must be set to 01:02:c9:00:00:01 + */ +MLXSW_ITEM_BUF(emad, eth_hdr, dmac, 0x00, 6); + +/* emad_eth_hdr_smac + * Source MAC in EMAD's Ethernet header. + * Must be set to 00:02:c9:01:02:03 + */ +MLXSW_ITEM_BUF(emad, eth_hdr, smac, 0x06, 6); + +/* emad_eth_hdr_ethertype + * Ethertype in EMAD's Ethernet header. + * Must be set to 0x8932 + */ +MLXSW_ITEM32(emad, eth_hdr, ethertype, 0x0C, 16, 16); + +/* emad_eth_hdr_mlx_proto + * Mellanox protocol. + * Must be set to 0x0. + */ +MLXSW_ITEM32(emad, eth_hdr, mlx_proto, 0x0C, 8, 8); + +/* emad_eth_hdr_ver + * Mellanox protocol version. + * Must be set to 0x0. + */ +MLXSW_ITEM32(emad, eth_hdr, ver, 0x0C, 4, 4); + +/* emad_op_tlv_type + * Type of the TLV. + * Must be set to 0x1 (operation TLV). + */ +MLXSW_ITEM32(emad, op_tlv, type, 0x00, 27, 5); + +/* emad_op_tlv_len + * Length of the operation TLV in u32. + * Must be set to 0x4. + */ +MLXSW_ITEM32(emad, op_tlv, len, 0x00, 16, 11); + +/* emad_op_tlv_dr + * Direct route bit. Setting to 1 indicates the EMAD is a direct route + * EMAD. DR TLV must follow. + * + * Note: Currently not supported and must not be set. + */ +MLXSW_ITEM32(emad, op_tlv, dr, 0x00, 15, 1); + +/* emad_op_tlv_status + * Returned status in case of EMAD response. Must be set to 0 in case + * of EMAD request. + * 0x0 - success + * 0x1 - device is busy. Requester should retry + * 0x2 - Mellanox protocol version not supported + * 0x3 - unknown TLV + * 0x4 - register not supported + * 0x5 - operation class not supported + * 0x6 - EMAD method not supported + * 0x7 - bad parameter (e.g. port out of range) + * 0x8 - resource not available + * 0x9 - message receipt acknowledgment. Requester should retry + * 0x70 - internal error + */ +MLXSW_ITEM32(emad, op_tlv, status, 0x00, 8, 7); + +/* emad_op_tlv_register_id + * Register ID of register within register TLV. + */ +MLXSW_ITEM32(emad, op_tlv, register_id, 0x04, 16, 16); + +/* emad_op_tlv_r + * Response bit. Setting to 1 indicates Response, otherwise request. + */ +MLXSW_ITEM32(emad, op_tlv, r, 0x04, 15, 1); + +/* emad_op_tlv_method + * EMAD method type. + * 0x1 - query + * 0x2 - write + * 0x3 - send (currently not supported) + * 0x4 - event + */ +MLXSW_ITEM32(emad, op_tlv, method, 0x04, 8, 7); + +/* emad_op_tlv_class + * EMAD operation class. Must be set to 0x1 (REG_ACCESS). + */ +MLXSW_ITEM32(emad, op_tlv, class, 0x04, 0, 8); + +/* emad_op_tlv_tid + * EMAD transaction ID. Used for pairing request and response EMADs. + */ +MLXSW_ITEM64(emad, op_tlv, tid, 0x08, 0, 64); + +/* emad_reg_tlv_type + * Type of the TLV. + * Must be set to 0x3 (register TLV). + */ +MLXSW_ITEM32(emad, reg_tlv, type, 0x00, 27, 5); + +/* emad_reg_tlv_len + * Length of the operation TLV in u32. + */ +MLXSW_ITEM32(emad, reg_tlv, len, 0x00, 16, 11); + +/* emad_end_tlv_type + * Type of the TLV. + * Must be set to 0x0 (end TLV). + */ +MLXSW_ITEM32(emad, end_tlv, type, 0x00, 27, 5); + +/* emad_end_tlv_len + * Length of the end TLV in u32. + * Must be set to 1. + */ +MLXSW_ITEM32(emad, end_tlv, len, 0x00, 16, 11); + +enum mlxsw_core_reg_access_type { + MLXSW_CORE_REG_ACCESS_TYPE_QUERY, + MLXSW_CORE_REG_ACCESS_TYPE_WRITE, +}; + +static inline const char * +mlxsw_core_reg_access_type_str(enum mlxsw_core_reg_access_type type) +{ + switch (type) { + case MLXSW_CORE_REG_ACCESS_TYPE_QUERY: + return "query"; + case MLXSW_CORE_REG_ACCESS_TYPE_WRITE: + return "write"; + } + BUG(); +} + +static void mlxsw_emad_pack_end_tlv(char *end_tlv) +{ + mlxsw_emad_end_tlv_type_set(end_tlv, MLXSW_EMAD_TLV_TYPE_END); + mlxsw_emad_end_tlv_len_set(end_tlv, MLXSW_EMAD_END_TLV_LEN); +} + +static void mlxsw_emad_pack_reg_tlv(char *reg_tlv, + const struct mlxsw_reg_info *reg, + char *payload) +{ + mlxsw_emad_reg_tlv_type_set(reg_tlv, MLXSW_EMAD_TLV_TYPE_REG); + mlxsw_emad_reg_tlv_len_set(reg_tlv, reg->len / sizeof(u32) + 1); + memcpy(reg_tlv + sizeof(u32), payload, reg->len); +} + +static void mlxsw_emad_pack_op_tlv(char *op_tlv, + const struct mlxsw_reg_info *reg, + enum mlxsw_core_reg_access_type type, + struct mlxsw_core *mlxsw_core) +{ + mlxsw_emad_op_tlv_type_set(op_tlv, MLXSW_EMAD_TLV_TYPE_OP); + mlxsw_emad_op_tlv_len_set(op_tlv, MLXSW_EMAD_OP_TLV_LEN); + mlxsw_emad_op_tlv_dr_set(op_tlv, 0); + mlxsw_emad_op_tlv_status_set(op_tlv, 0); + mlxsw_emad_op_tlv_register_id_set(op_tlv, reg->id); + mlxsw_emad_op_tlv_r_set(op_tlv, MLXSW_EMAD_OP_TLV_REQUEST); + if (type == MLXSW_CORE_REG_ACCESS_TYPE_QUERY) + mlxsw_emad_op_tlv_method_set(op_tlv, + MLXSW_EMAD_OP_TLV_METHOD_QUERY); + else + mlxsw_emad_op_tlv_method_set(op_tlv, + MLXSW_EMAD_OP_TLV_METHOD_WRITE); + mlxsw_emad_op_tlv_class_set(op_tlv, + MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS); + mlxsw_emad_op_tlv_tid_set(op_tlv, mlxsw_core->emad.tid); +} + +static int mlxsw_emad_construct_eth_hdr(struct sk_buff *skb) +{ + char *eth_hdr = skb_push(skb, MLXSW_EMAD_ETH_HDR_LEN); + + mlxsw_emad_eth_hdr_dmac_memcpy_to(eth_hdr, MLXSW_EMAD_EH_DMAC); + mlxsw_emad_eth_hdr_smac_memcpy_to(eth_hdr, MLXSW_EMAD_EH_SMAC); + mlxsw_emad_eth_hdr_ethertype_set(eth_hdr, MLXSW_EMAD_EH_ETHERTYPE); + mlxsw_emad_eth_hdr_mlx_proto_set(eth_hdr, MLXSW_EMAD_EH_MLX_PROTO); + mlxsw_emad_eth_hdr_ver_set(eth_hdr, MLXSW_EMAD_EH_PROTO_VERSION); + + skb_reset_mac_header(skb); + + return 0; +} + +static void mlxsw_emad_construct(struct sk_buff *skb, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type, + struct mlxsw_core *mlxsw_core) +{ + char *buf; + + buf = skb_push(skb, MLXSW_EMAD_END_TLV_LEN * sizeof(u32)); + mlxsw_emad_pack_end_tlv(buf); + + buf = skb_push(skb, reg->len + sizeof(u32)); + mlxsw_emad_pack_reg_tlv(buf, reg, payload); + + buf = skb_push(skb, MLXSW_EMAD_OP_TLV_LEN * sizeof(u32)); + mlxsw_emad_pack_op_tlv(buf, reg, type, mlxsw_core); + + mlxsw_emad_construct_eth_hdr(skb); +} + +static char *mlxsw_emad_op_tlv(const struct sk_buff *skb) +{ + return ((char *) (skb->data + MLXSW_EMAD_ETH_HDR_LEN)); +} + +static char *mlxsw_emad_reg_tlv(const struct sk_buff *skb) +{ + return ((char *) (skb->data + MLXSW_EMAD_ETH_HDR_LEN + + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32))); +} + +static char *mlxsw_emad_reg_payload(const char *op_tlv) +{ + return ((char *) (op_tlv + (MLXSW_EMAD_OP_TLV_LEN + 1) * sizeof(u32))); +} + +static u64 mlxsw_emad_get_tid(const struct sk_buff *skb) +{ + char *op_tlv; + + op_tlv = mlxsw_emad_op_tlv(skb); + return mlxsw_emad_op_tlv_tid_get(op_tlv); +} + +static bool mlxsw_emad_is_resp(const struct sk_buff *skb) +{ + char *op_tlv; + + op_tlv = mlxsw_emad_op_tlv(skb); + return (mlxsw_emad_op_tlv_r_get(op_tlv) == MLXSW_EMAD_OP_TLV_RESPONSE); +} + +#define MLXSW_EMAD_TIMEOUT_MS 200 + +static int __mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + int err; + int ret; + + mlxsw_core->emad.trans_active = true; + + err = mlxsw_core_skb_transmit(mlxsw_core->driver_priv, skb, tx_info); + if (err) { + dev_err(mlxsw_core->bus_info->dev, "Failed to transmit EMAD (tid=%llx)\n", + mlxsw_core->emad.tid); + dev_kfree_skb(skb); + goto trans_inactive_out; + } + + ret = wait_event_timeout(mlxsw_core->emad.wait, + !(mlxsw_core->emad.trans_active), + msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS)); + if (!ret) { + dev_warn(mlxsw_core->bus_info->dev, "EMAD timed-out (tid=%llx)\n", + mlxsw_core->emad.tid); + err = -EIO; + goto trans_inactive_out; + } + + return 0; + +trans_inactive_out: + mlxsw_core->emad.trans_active = false; + return err; +} + +static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core, + char *op_tlv) +{ + enum mlxsw_emad_op_tlv_status status; + u64 tid; + + status = mlxsw_emad_op_tlv_status_get(op_tlv); + tid = mlxsw_emad_op_tlv_tid_get(op_tlv); + + switch (status) { + case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS: + return 0; + case MLXSW_EMAD_OP_TLV_STATUS_BUSY: + case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK: + dev_warn(mlxsw_core->bus_info->dev, "Reg access status again (tid=%llx,status=%x(%s))\n", + tid, status, mlxsw_emad_op_tlv_status_str(status)); + return -EAGAIN; + case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED: + case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV: + case MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED: + case MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED: + case MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED: + case MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER: + case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE: + case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR: + default: + dev_err(mlxsw_core->bus_info->dev, "Reg access status failed (tid=%llx,status=%x(%s))\n", + tid, status, mlxsw_emad_op_tlv_status_str(status)); + return -EIO; + } +} + +static int mlxsw_emad_process_status_skb(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb) +{ + return mlxsw_emad_process_status(mlxsw_core, mlxsw_emad_op_tlv(skb)); +} + +static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + struct sk_buff *trans_skb; + int n_retry; + int err; + + n_retry = 0; +retry: + /* We copy the EMAD to a new skb, since we might need + * to retransmit it in case of failure. + */ + trans_skb = skb_copy(skb, GFP_KERNEL); + if (!trans_skb) { + err = -ENOMEM; + goto out; + } + + err = __mlxsw_emad_transmit(mlxsw_core, trans_skb, tx_info); + if (!err) { + struct sk_buff *resp_skb = mlxsw_core->emad.resp_skb; + + err = mlxsw_emad_process_status_skb(mlxsw_core, resp_skb); + if (err) + dev_kfree_skb(resp_skb); + if (!err || err != -EAGAIN) + goto out; + } + if (n_retry++ < MLXSW_EMAD_MAX_RETRY) + goto retry; + +out: + dev_kfree_skb(skb); + mlxsw_core->emad.tid++; + return err; +} + +static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port, + void *priv) +{ + struct mlxsw_core *mlxsw_core = priv; + + if (mlxsw_emad_is_resp(skb) && + mlxsw_core->emad.trans_active && + mlxsw_emad_get_tid(skb) == mlxsw_core->emad.tid) { + mlxsw_core->emad.resp_skb = skb; + mlxsw_core->emad.trans_active = false; + wake_up(&mlxsw_core->emad.wait); + } else { + dev_kfree_skb(skb); + } +} + +static const struct mlxsw_rx_listener mlxsw_emad_rx_listener = { + .func = mlxsw_emad_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_ETHEMAD, +}; + +static int mlxsw_emad_traps_set(struct mlxsw_core *mlxsw_core) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int err; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, + MLXSW_TRAP_ID_ETHEMAD); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); +} + +static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) +{ + int err; + + /* Set the upper 32 bits of the transaction ID field to a random + * number. This allows us to discard EMADs addressed to other + * devices. + */ + get_random_bytes(&mlxsw_core->emad.tid, 4); + mlxsw_core->emad.tid = mlxsw_core->emad.tid << 32; + + init_waitqueue_head(&mlxsw_core->emad.wait); + mlxsw_core->emad.trans_active = false; + mutex_init(&mlxsw_core->emad.lock); + + err = mlxsw_core_rx_listener_register(mlxsw_core, + &mlxsw_emad_rx_listener, + mlxsw_core); + if (err) + return err; + + err = mlxsw_emad_traps_set(mlxsw_core); + if (err) + goto err_emad_trap_set; + + mlxsw_core->emad.use_emad = true; + + return 0; + +err_emad_trap_set: + mlxsw_core_rx_listener_unregister(mlxsw_core, + &mlxsw_emad_rx_listener, + mlxsw_core); + return err; +} + +static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core) +{ + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + + mlxsw_core->emad.use_emad = false; + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD, + MLXSW_TRAP_ID_ETHEMAD); + mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); + + mlxsw_core_rx_listener_unregister(mlxsw_core, + &mlxsw_emad_rx_listener, + mlxsw_core); +} + +static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, + u16 reg_len) +{ + struct sk_buff *skb; + u16 emad_len; + + emad_len = (reg_len + sizeof(u32) + MLXSW_EMAD_ETH_HDR_LEN + + (MLXSW_EMAD_OP_TLV_LEN + MLXSW_EMAD_END_TLV_LEN) * + sizeof(u32) + mlxsw_core->driver->txhdr_len); + if (emad_len > MLXSW_EMAD_MAX_FRAME_LEN) + return NULL; + + skb = netdev_alloc_skb(NULL, emad_len); + if (!skb) + return NULL; + memset(skb->data, 0, emad_len); + skb_reserve(skb, emad_len); + + return skb; +} + +/***************** + * Core functions + *****************/ + +static int mlxsw_core_rx_stats_dbg_read(struct seq_file *file, void *data) +{ + struct mlxsw_core *mlxsw_core = file->private; + struct mlxsw_core_pcpu_stats *p; + u64 rx_packets, rx_bytes; + u64 tmp_rx_packets, tmp_rx_bytes; + u32 rx_dropped, rx_invalid; + unsigned int start; + int i; + int j; + static const char hdr[] = + " NUM RX_PACKETS RX_BYTES RX_DROPPED\n"; + + seq_printf(file, hdr); + for (i = 0; i < MLXSW_TRAP_ID_MAX; i++) { + rx_packets = 0; + rx_bytes = 0; + rx_dropped = 0; + for_each_possible_cpu(j) { + p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); + do { + start = u64_stats_fetch_begin(&p->syncp); + tmp_rx_packets = p->trap_rx_packets[i]; + tmp_rx_bytes = p->trap_rx_bytes[i]; + } while (u64_stats_fetch_retry(&p->syncp, start)); + + rx_packets += tmp_rx_packets; + rx_bytes += tmp_rx_bytes; + rx_dropped += p->trap_rx_dropped[i]; + } + seq_printf(file, "trap %3d %12llu %12llu %10u\n", + i, rx_packets, rx_bytes, rx_dropped); + } + rx_invalid = 0; + for_each_possible_cpu(j) { + p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); + rx_invalid += p->trap_rx_invalid; + } + seq_printf(file, "trap INV %10u\n", + rx_invalid); + + for (i = 0; i < MLXSW_PORT_MAX_PORTS; i++) { + rx_packets = 0; + rx_bytes = 0; + rx_dropped = 0; + for_each_possible_cpu(j) { + p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); + do { + start = u64_stats_fetch_begin(&p->syncp); + tmp_rx_packets = p->port_rx_packets[i]; + tmp_rx_bytes = p->port_rx_bytes[i]; + } while (u64_stats_fetch_retry(&p->syncp, start)); + + rx_packets += tmp_rx_packets; + rx_bytes += tmp_rx_bytes; + rx_dropped += p->port_rx_dropped[i]; + } + seq_printf(file, "port %3d %12llu %12llu %10u\n", + i, rx_packets, rx_bytes, rx_dropped); + } + rx_invalid = 0; + for_each_possible_cpu(j) { + p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); + rx_invalid += p->port_rx_invalid; + } + seq_printf(file, "port INV %10u\n", + rx_invalid); + return 0; +} + +static int mlxsw_core_rx_stats_dbg_open(struct inode *inode, struct file *f) +{ + struct mlxsw_core *mlxsw_core = inode->i_private; + + return single_open(f, mlxsw_core_rx_stats_dbg_read, mlxsw_core); +} + +static const struct file_operations mlxsw_core_rx_stats_dbg_ops = { + .owner = THIS_MODULE, + .open = mlxsw_core_rx_stats_dbg_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, + const char *buf, size_t size) +{ + __be32 *m = (__be32 *) buf; + int i; + int count = size / sizeof(__be32); + + for (i = count - 1; i >= 0; i--) + if (m[i]) + break; + i++; + count = i ? i : 1; + for (i = 0; i < count; i += 4) + dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n", + i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]), + be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3])); +} + +int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver) +{ + spin_lock(&mlxsw_core_driver_list_lock); + list_add_tail(&mlxsw_driver->list, &mlxsw_core_driver_list); + spin_unlock(&mlxsw_core_driver_list_lock); + return 0; +} +EXPORT_SYMBOL(mlxsw_core_driver_register); + +void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver) +{ + spin_lock(&mlxsw_core_driver_list_lock); + list_del(&mlxsw_driver->list); + spin_unlock(&mlxsw_core_driver_list_lock); +} +EXPORT_SYMBOL(mlxsw_core_driver_unregister); + +static struct mlxsw_driver *__driver_find(const char *kind) +{ + struct mlxsw_driver *mlxsw_driver; + + list_for_each_entry(mlxsw_driver, &mlxsw_core_driver_list, list) { + if (strcmp(mlxsw_driver->kind, kind) == 0) + return mlxsw_driver; + } + return NULL; +} + +static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind) +{ + struct mlxsw_driver *mlxsw_driver; + + spin_lock(&mlxsw_core_driver_list_lock); + mlxsw_driver = __driver_find(kind); + if (!mlxsw_driver) { + spin_unlock(&mlxsw_core_driver_list_lock); + request_module(MLXSW_MODULE_ALIAS_PREFIX "%s", kind); + spin_lock(&mlxsw_core_driver_list_lock); + mlxsw_driver = __driver_find(kind); + } + if (mlxsw_driver) { + if (!try_module_get(mlxsw_driver->owner)) + mlxsw_driver = NULL; + } + + spin_unlock(&mlxsw_core_driver_list_lock); + return mlxsw_driver; +} + +static void mlxsw_core_driver_put(const char *kind) +{ + struct mlxsw_driver *mlxsw_driver; + + spin_lock(&mlxsw_core_driver_list_lock); + mlxsw_driver = __driver_find(kind); + spin_unlock(&mlxsw_core_driver_list_lock); + if (!mlxsw_driver) + return; + module_put(mlxsw_driver->owner); +} + +static int mlxsw_core_debugfs_init(struct mlxsw_core *mlxsw_core) +{ + const struct mlxsw_bus_info *bus_info = mlxsw_core->bus_info; + + mlxsw_core->dbg_dir = debugfs_create_dir(bus_info->device_name, + mlxsw_core_dbg_root); + if (!mlxsw_core->dbg_dir) + return -ENOMEM; + debugfs_create_file("rx_stats", S_IRUGO, mlxsw_core->dbg_dir, + mlxsw_core, &mlxsw_core_rx_stats_dbg_ops); + mlxsw_core->dbg.vsd_blob.data = (void *) &bus_info->vsd; + mlxsw_core->dbg.vsd_blob.size = sizeof(bus_info->vsd); + debugfs_create_blob("vsd", S_IRUGO, mlxsw_core->dbg_dir, + &mlxsw_core->dbg.vsd_blob); + mlxsw_core->dbg.psid_blob.data = (void *) &bus_info->psid; + mlxsw_core->dbg.psid_blob.size = sizeof(bus_info->psid); + debugfs_create_blob("psid", S_IRUGO, mlxsw_core->dbg_dir, + &mlxsw_core->dbg.psid_blob); + return 0; +} + +static void mlxsw_core_debugfs_fini(struct mlxsw_core *mlxsw_core) +{ + debugfs_remove_recursive(mlxsw_core->dbg_dir); +} + +int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_bus *mlxsw_bus, + void *bus_priv) +{ + const char *device_kind = mlxsw_bus_info->device_kind; + struct mlxsw_core *mlxsw_core; + struct mlxsw_driver *mlxsw_driver; + size_t alloc_size; + int err; + + mlxsw_driver = mlxsw_core_driver_get(device_kind); + if (!mlxsw_driver) + return -EINVAL; + alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size; + mlxsw_core = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_core) { + err = -ENOMEM; + goto err_core_alloc; + } + + INIT_LIST_HEAD(&mlxsw_core->rx_listener_list); + INIT_LIST_HEAD(&mlxsw_core->event_listener_list); + mlxsw_core->driver = mlxsw_driver; + mlxsw_core->bus = mlxsw_bus; + mlxsw_core->bus_priv = bus_priv; + mlxsw_core->bus_info = mlxsw_bus_info; + + mlxsw_core->pcpu_stats = + netdev_alloc_pcpu_stats(struct mlxsw_core_pcpu_stats); + if (!mlxsw_core->pcpu_stats) { + err = -ENOMEM; + goto err_alloc_stats; + } + + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile); + if (err) + goto err_bus_init; + + err = mlxsw_emad_init(mlxsw_core); + if (err) + goto err_emad_init; + + err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core, + mlxsw_bus_info); + if (err) + goto err_driver_init; + + err = mlxsw_core_debugfs_init(mlxsw_core); + if (err) + goto err_debugfs_init; + + return 0; + +err_debugfs_init: + mlxsw_core->driver->fini(mlxsw_core->driver_priv); +err_driver_init: + mlxsw_emad_fini(mlxsw_core); +err_emad_init: + mlxsw_bus->fini(bus_priv); +err_bus_init: + free_percpu(mlxsw_core->pcpu_stats); +err_alloc_stats: + kfree(mlxsw_core); +err_core_alloc: + mlxsw_core_driver_put(device_kind); + return err; +} +EXPORT_SYMBOL(mlxsw_core_bus_device_register); + +void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) +{ + const char *device_kind = mlxsw_core->bus_info->device_kind; + + mlxsw_core_debugfs_fini(mlxsw_core); + mlxsw_core->driver->fini(mlxsw_core->driver_priv); + mlxsw_emad_fini(mlxsw_core); + mlxsw_core->bus->fini(mlxsw_core->bus_priv); + free_percpu(mlxsw_core->pcpu_stats); + kfree(mlxsw_core); + mlxsw_core_driver_put(device_kind); +} +EXPORT_SYMBOL(mlxsw_core_bus_device_unregister); + +static struct mlxsw_core *__mlxsw_core_get(void *driver_priv) +{ + return container_of(driver_priv, struct mlxsw_core, driver_priv); +} + +bool mlxsw_core_skb_transmit_busy(void *driver_priv, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_core *mlxsw_core = __mlxsw_core_get(driver_priv); + + return mlxsw_core->bus->skb_transmit_busy(mlxsw_core->bus_priv, + tx_info); +} +EXPORT_SYMBOL(mlxsw_core_skb_transmit_busy); + +int mlxsw_core_skb_transmit(void *driver_priv, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_core *mlxsw_core = __mlxsw_core_get(driver_priv); + + return mlxsw_core->bus->skb_transmit(mlxsw_core->bus_priv, skb, + tx_info); +} +EXPORT_SYMBOL(mlxsw_core_skb_transmit); + +static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a, + const struct mlxsw_rx_listener *rxl_b) +{ + return (rxl_a->func == rxl_b->func && + rxl_a->local_port == rxl_b->local_port && + rxl_a->trap_id == rxl_b->trap_id); +} + +static struct mlxsw_rx_listener_item * +__find_rx_listener_item(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + void *priv) +{ + struct mlxsw_rx_listener_item *rxl_item; + + list_for_each_entry(rxl_item, &mlxsw_core->rx_listener_list, list) { + if (__is_rx_listener_equal(&rxl_item->rxl, rxl) && + rxl_item->priv == priv) + return rxl_item; + } + return NULL; +} + +int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + void *priv) +{ + struct mlxsw_rx_listener_item *rxl_item; + + rxl_item = __find_rx_listener_item(mlxsw_core, rxl, priv); + if (rxl_item) + return -EEXIST; + rxl_item = kmalloc(sizeof(*rxl_item), GFP_KERNEL); + if (!rxl_item) + return -ENOMEM; + rxl_item->rxl = *rxl; + rxl_item->priv = priv; + + list_add_rcu(&rxl_item->list, &mlxsw_core->rx_listener_list); + return 0; +} +EXPORT_SYMBOL(mlxsw_core_rx_listener_register); + +void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + void *priv) +{ + struct mlxsw_rx_listener_item *rxl_item; + + rxl_item = __find_rx_listener_item(mlxsw_core, rxl, priv); + if (!rxl_item) + return; + list_del_rcu(&rxl_item->list); + synchronize_rcu(); + kfree(rxl_item); +} +EXPORT_SYMBOL(mlxsw_core_rx_listener_unregister); + +static void mlxsw_core_event_listener_func(struct sk_buff *skb, u8 local_port, + void *priv) +{ + struct mlxsw_event_listener_item *event_listener_item = priv; + struct mlxsw_reg_info reg; + char *payload; + char *op_tlv = mlxsw_emad_op_tlv(skb); + char *reg_tlv = mlxsw_emad_reg_tlv(skb); + + reg.id = mlxsw_emad_op_tlv_register_id_get(op_tlv); + reg.len = (mlxsw_emad_reg_tlv_len_get(reg_tlv) - 1) * sizeof(u32); + payload = mlxsw_emad_reg_payload(op_tlv); + event_listener_item->el.func(®, payload, event_listener_item->priv); + dev_kfree_skb(skb); +} + +static bool __is_event_listener_equal(const struct mlxsw_event_listener *el_a, + const struct mlxsw_event_listener *el_b) +{ + return (el_a->func == el_b->func && + el_a->trap_id == el_b->trap_id); +} + +static struct mlxsw_event_listener_item * +__find_event_listener_item(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el, + void *priv) +{ + struct mlxsw_event_listener_item *el_item; + + list_for_each_entry(el_item, &mlxsw_core->event_listener_list, list) { + if (__is_event_listener_equal(&el_item->el, el) && + el_item->priv == priv) + return el_item; + } + return NULL; +} + +int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el, + void *priv) +{ + int err; + struct mlxsw_event_listener_item *el_item; + const struct mlxsw_rx_listener rxl = { + .func = mlxsw_core_event_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = el->trap_id, + }; + + el_item = __find_event_listener_item(mlxsw_core, el, priv); + if (el_item) + return -EEXIST; + el_item = kmalloc(sizeof(*el_item), GFP_KERNEL); + if (!el_item) + return -ENOMEM; + el_item->el = *el; + el_item->priv = priv; + + err = mlxsw_core_rx_listener_register(mlxsw_core, &rxl, el_item); + if (err) + goto err_rx_listener_register; + + /* No reason to save item if we did not manage to register an RX + * listener for it. + */ + list_add_rcu(&el_item->list, &mlxsw_core->event_listener_list); + + return 0; + +err_rx_listener_register: + kfree(el_item); + return err; +} +EXPORT_SYMBOL(mlxsw_core_event_listener_register); + +void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el, + void *priv) +{ + struct mlxsw_event_listener_item *el_item; + const struct mlxsw_rx_listener rxl = { + .func = mlxsw_core_event_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = el->trap_id, + }; + + el_item = __find_event_listener_item(mlxsw_core, el, priv); + if (!el_item) + return; + mlxsw_core_rx_listener_unregister(mlxsw_core, &rxl, el_item); + list_del(&el_item->list); + kfree(el_item); +} +EXPORT_SYMBOL(mlxsw_core_event_listener_unregister); + +static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type) +{ + int err; + char *op_tlv; + struct sk_buff *skb; + struct mlxsw_tx_info tx_info = { + .local_port = MLXSW_PORT_CPU_PORT, + .is_emad = true, + }; + + skb = mlxsw_emad_alloc(mlxsw_core, reg->len); + if (!skb) + return -ENOMEM; + + mlxsw_emad_construct(skb, reg, payload, type, mlxsw_core); + mlxsw_core->driver->txhdr_construct(skb, &tx_info); + + dev_dbg(mlxsw_core->bus_info->dev, "EMAD send (tid=%llx)\n", + mlxsw_core->emad.tid); + mlxsw_core_buf_dump_dbg(mlxsw_core, skb->data, skb->len); + + err = mlxsw_emad_transmit(mlxsw_core, skb, &tx_info); + if (!err) { + op_tlv = mlxsw_emad_op_tlv(mlxsw_core->emad.resp_skb); + memcpy(payload, mlxsw_emad_reg_payload(op_tlv), + reg->len); + + dev_dbg(mlxsw_core->bus_info->dev, "EMAD recv (tid=%llx)\n", + mlxsw_core->emad.tid - 1); + mlxsw_core_buf_dump_dbg(mlxsw_core, + mlxsw_core->emad.resp_skb->data, + mlxsw_core->emad.resp_skb->len); + + dev_kfree_skb(mlxsw_core->emad.resp_skb); + } + + return err; +} + +static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type) +{ + int err, n_retry; + char *in_mbox, *out_mbox, *tmp; + + in_mbox = mlxsw_cmd_mbox_alloc(); + if (!in_mbox) + return -ENOMEM; + + out_mbox = mlxsw_cmd_mbox_alloc(); + if (!out_mbox) { + err = -ENOMEM; + goto free_in_mbox; + } + + mlxsw_emad_pack_op_tlv(in_mbox, reg, type, mlxsw_core); + tmp = in_mbox + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32); + mlxsw_emad_pack_reg_tlv(tmp, reg, payload); + + n_retry = 0; +retry: + err = mlxsw_cmd_access_reg(mlxsw_core, in_mbox, out_mbox); + if (!err) { + err = mlxsw_emad_process_status(mlxsw_core, out_mbox); + if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY) + goto retry; + } + + if (!err) + memcpy(payload, mlxsw_emad_reg_payload(out_mbox), + reg->len); + + mlxsw_core->emad.tid++; + mlxsw_cmd_mbox_free(out_mbox); +free_in_mbox: + mlxsw_cmd_mbox_free(in_mbox); + return err; +} + +static int mlxsw_core_reg_access(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type) +{ + u64 cur_tid; + int err; + + if (mutex_lock_interruptible(&mlxsw_core->emad.lock)) { + dev_err(mlxsw_core->bus_info->dev, "Reg access interrupted (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + return -EINTR; + } + + cur_tid = mlxsw_core->emad.tid; + dev_dbg(mlxsw_core->bus_info->dev, "Reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", + cur_tid, reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + + /* During initialization EMAD interface is not available to us, + * so we default to command interface. We switch to EMAD interface + * after setting the appropriate traps. + */ + if (!mlxsw_core->emad.use_emad) + err = mlxsw_core_reg_access_cmd(mlxsw_core, reg, + payload, type); + else + err = mlxsw_core_reg_access_emad(mlxsw_core, reg, + payload, type); + + if (err) + dev_err(mlxsw_core->bus_info->dev, "Reg access failed (tid=%llx,reg_id=%x(%s),type=%s)\n", + cur_tid, reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + + mutex_unlock(&mlxsw_core->emad.lock); + return err; +} + +int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload) +{ + return mlxsw_core_reg_access(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_QUERY); +} +EXPORT_SYMBOL(mlxsw_reg_query); + +int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload) +{ + return mlxsw_core_reg_access(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_WRITE); +} +EXPORT_SYMBOL(mlxsw_reg_write); + +void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, + struct mlxsw_rx_info *rx_info) +{ + struct mlxsw_rx_listener_item *rxl_item; + const struct mlxsw_rx_listener *rxl; + struct mlxsw_core_pcpu_stats *pcpu_stats; + u8 local_port = rx_info->sys_port; + bool found = false; + + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n", + __func__, rx_info->sys_port, rx_info->trap_id); + + if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || + (local_port >= MLXSW_PORT_MAX_PORTS)) + goto drop; + + rcu_read_lock(); + list_for_each_entry_rcu(rxl_item, &mlxsw_core->rx_listener_list, list) { + rxl = &rxl_item->rxl; + if ((rxl->local_port == MLXSW_PORT_DONT_CARE || + rxl->local_port == local_port) && + rxl->trap_id == rx_info->trap_id) { + found = true; + break; + } + } + rcu_read_unlock(); + if (!found) + goto drop; + + pcpu_stats = this_cpu_ptr(mlxsw_core->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->port_rx_packets[local_port]++; + pcpu_stats->port_rx_bytes[local_port] += skb->len; + pcpu_stats->trap_rx_packets[rx_info->trap_id]++; + pcpu_stats->trap_rx_bytes[rx_info->trap_id] += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + rxl->func(skb, local_port, rxl_item->priv); + return; + +drop: + if (rx_info->trap_id >= MLXSW_TRAP_ID_MAX) + this_cpu_inc(mlxsw_core->pcpu_stats->trap_rx_invalid); + else + this_cpu_inc(mlxsw_core->pcpu_stats->trap_rx_dropped[rx_info->trap_id]); + if (local_port >= MLXSW_PORT_MAX_PORTS) + this_cpu_inc(mlxsw_core->pcpu_stats->port_rx_invalid); + else + this_cpu_inc(mlxsw_core->pcpu_stats->port_rx_dropped[local_port]); + dev_kfree_skb(skb); +} +EXPORT_SYMBOL(mlxsw_core_skb_receive); + +int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size) +{ + u8 status; + int err; + + BUG_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32)); + if (!mlxsw_core->bus->cmd_exec) + return -EOPNOTSUPP; + + dev_dbg(mlxsw_core->bus_info->dev, "Cmd exec (opcode=%x(%s),opcode_mod=%x,in_mod=%x)\n", + opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod, in_mod); + if (in_mbox) { + dev_dbg(mlxsw_core->bus_info->dev, "Input mailbox:\n"); + mlxsw_core_buf_dump_dbg(mlxsw_core, in_mbox, in_mbox_size); + } + + err = mlxsw_core->bus->cmd_exec(mlxsw_core->bus_priv, opcode, + opcode_mod, in_mod, out_mbox_direct, + in_mbox, in_mbox_size, + out_mbox, out_mbox_size, &status); + + if (err == -EIO && status != MLXSW_CMD_STATUS_OK) { + dev_err(mlxsw_core->bus_info->dev, "Cmd exec failed (opcode=%x(%s),opcode_mod=%x,in_mod=%x,status=%x(%s))\n", + opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod, + in_mod, status, mlxsw_cmd_status_str(status)); + } else if (err == -ETIMEDOUT) { + dev_err(mlxsw_core->bus_info->dev, "Cmd exec timed-out (opcode=%x(%s),opcode_mod=%x,in_mod=%x)\n", + opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod, + in_mod); + } + + if (!err && out_mbox) { + dev_dbg(mlxsw_core->bus_info->dev, "Output mailbox:\n"); + mlxsw_core_buf_dump_dbg(mlxsw_core, out_mbox, out_mbox_size); + } + return err; +} +EXPORT_SYMBOL(mlxsw_cmd_exec); + +static int __init mlxsw_core_module_init(void) +{ + mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); + if (!mlxsw_core_dbg_root) + return -ENOMEM; + return 0; +} + +static void __exit mlxsw_core_module_exit(void) +{ + debugfs_remove_recursive(mlxsw_core_dbg_root); +} + +module_init(mlxsw_core_module_init); +module_exit(mlxsw_core_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox switch device core driver"); diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/core.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/core.h new file mode 100644 index 000000000..807827350 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -0,0 +1,212 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_CORE_H +#define _MLXSW_CORE_H + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/gfp.h> +#include <linux/types.h> +#include <linux/skbuff.h> + +#include "trap.h" +#include "reg.h" + +#include "cmd.h" + +#define MLXSW_MODULE_ALIAS_PREFIX "mlxsw-driver-" +#define MODULE_MLXSW_DRIVER_ALIAS(kind) \ + MODULE_ALIAS(MLXSW_MODULE_ALIAS_PREFIX kind) + +#define MLXSW_DEVICE_KIND_SWITCHX2 "switchx2" +#define MLXSW_DEVICE_KIND_SPECTRUM "spectrum" + +struct mlxsw_core; +struct mlxsw_driver; +struct mlxsw_bus; +struct mlxsw_bus_info; + +int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); +void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); + +int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_bus *mlxsw_bus, + void *bus_priv); +void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core); + +struct mlxsw_tx_info { + u8 local_port; + bool is_emad; +}; + +bool mlxsw_core_skb_transmit_busy(void *driver_priv, + const struct mlxsw_tx_info *tx_info); + +int mlxsw_core_skb_transmit(void *driver_priv, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + +struct mlxsw_rx_listener { + void (*func)(struct sk_buff *skb, u8 local_port, void *priv); + u8 local_port; + u16 trap_id; +}; + +struct mlxsw_event_listener { + void (*func)(const struct mlxsw_reg_info *reg, + char *payload, void *priv); + enum mlxsw_event_trap_id trap_id; +}; + +int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + void *priv); +void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_rx_listener *rxl, + void *priv); + +int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el, + void *priv); +void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_event_listener *el, + void *priv); + +int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload); +int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload); + +struct mlxsw_rx_info { + u16 sys_port; + int trap_id; +}; + +void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, + struct mlxsw_rx_info *rx_info); + +#define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 + +struct mlxsw_swid_config { + u8 used_type:1, + used_properties:1; + u8 type; + u8 properties; +}; + +struct mlxsw_config_profile { + u16 used_max_vepa_channels:1, + used_max_lag:1, + used_max_port_per_lag:1, + used_max_mid:1, + used_max_pgt:1, + used_max_system_port:1, + used_max_vlan_groups:1, + used_max_regions:1, + used_flood_tables:1, + used_flood_mode:1, + used_max_ib_mc:1, + used_max_pkey:1, + used_ar_sec:1, + used_adaptive_routing_group_cap:1; + u8 max_vepa_channels; + u16 max_lag; + u16 max_port_per_lag; + u16 max_mid; + u16 max_pgt; + u16 max_system_port; + u16 max_vlan_groups; + u16 max_regions; + u8 max_flood_tables; + u8 max_vid_flood_tables; + u8 flood_mode; + u8 max_fid_offset_flood_tables; + u16 fid_offset_flood_table_size; + u8 max_fid_flood_tables; + u16 fid_flood_table_size; + u16 max_ib_mc; + u16 max_pkey; + u8 ar_sec; + u16 adaptive_routing_group_cap; + u8 arn; + struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; +}; + +struct mlxsw_driver { + struct list_head list; + const char *kind; + struct module *owner; + size_t priv_size; + int (*init)(void *driver_priv, struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info); + void (*fini)(void *driver_priv); + void (*txhdr_construct)(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + u8 txhdr_len; + const struct mlxsw_config_profile *profile; +}; + +struct mlxsw_bus { + const char *kind; + int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile); + void (*fini)(void *bus_priv); + bool (*skb_transmit_busy)(void *bus_priv, + const struct mlxsw_tx_info *tx_info); + int (*skb_transmit)(void *bus_priv, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info); + int (*cmd_exec)(void *bus_priv, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size, + u8 *p_status); +}; + +struct mlxsw_bus_info { + const char *device_kind; + const char *device_name; + struct device *dev; + struct { + u16 major; + u16 minor; + u16 subminor; + } fw_rev; + u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; + u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; +}; + +#endif diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/emad.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/emad.h new file mode 100644 index 000000000..97b6bb5d9 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/emad.h @@ -0,0 +1,127 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/emad.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_EMAD_H +#define _MLXSW_EMAD_H + +#define MLXSW_EMAD_MAX_FRAME_LEN 1518 /* Length in u8 */ +#define MLXSW_EMAD_MAX_RETRY 5 + +/* EMAD Ethernet header */ +#define MLXSW_EMAD_ETH_HDR_LEN 0x10 /* Length in u8 */ +#define MLXSW_EMAD_EH_DMAC "\x01\x02\xc9\x00\x00\x01" +#define MLXSW_EMAD_EH_SMAC "\x00\x02\xc9\x01\x02\x03" +#define MLXSW_EMAD_EH_ETHERTYPE 0x8932 +#define MLXSW_EMAD_EH_MLX_PROTO 0 +#define MLXSW_EMAD_EH_PROTO_VERSION 0 + +/* EMAD TLV Types */ +enum { + MLXSW_EMAD_TLV_TYPE_END, + MLXSW_EMAD_TLV_TYPE_OP, + MLXSW_EMAD_TLV_TYPE_DR, + MLXSW_EMAD_TLV_TYPE_REG, + MLXSW_EMAD_TLV_TYPE_USERDATA, + MLXSW_EMAD_TLV_TYPE_OOBETH, +}; + +/* OP TLV */ +#define MLXSW_EMAD_OP_TLV_LEN 4 /* Length in u32 */ + +enum { + MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS = 1, + MLXSW_EMAD_OP_TLV_CLASS_IPC = 2, +}; + +enum mlxsw_emad_op_tlv_status { + MLXSW_EMAD_OP_TLV_STATUS_SUCCESS, + MLXSW_EMAD_OP_TLV_STATUS_BUSY, + MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED, + MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV, + MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED, + MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED, + MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED, + MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER, + MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE, + MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK, + MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR = 0x70, +}; + +static inline char *mlxsw_emad_op_tlv_status_str(u8 status) +{ + switch (status) { + case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS: + return "operation performed"; + case MLXSW_EMAD_OP_TLV_STATUS_BUSY: + return "device is busy"; + case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED: + return "version not supported"; + case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV: + return "unknown TLV"; + case MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED: + return "register not supported"; + case MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED: + return "class not supported"; + case MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED: + return "method not supported"; + case MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER: + return "bad parameter"; + case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE: + return "resource not available"; + case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK: + return "acknowledged. retransmit"; + case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR: + return "internal error"; + default: + return "*UNKNOWN*"; + } +} + +enum { + MLXSW_EMAD_OP_TLV_REQUEST, + MLXSW_EMAD_OP_TLV_RESPONSE +}; + +enum { + MLXSW_EMAD_OP_TLV_METHOD_QUERY = 1, + MLXSW_EMAD_OP_TLV_METHOD_WRITE = 2, + MLXSW_EMAD_OP_TLV_METHOD_SEND = 3, + MLXSW_EMAD_OP_TLV_METHOD_EVENT = 5, +}; + +/* END TLV */ +#define MLXSW_EMAD_END_TLV_LEN 1 /* Length in u32 */ + +#endif diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/item.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/item.h new file mode 100644 index 000000000..a94dbda65 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/item.h @@ -0,0 +1,441 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/item.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_ITEM_H +#define _MLXSW_ITEM_H + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/bitops.h> + +struct mlxsw_item { + unsigned short offset; /* bytes in container */ + unsigned short step; /* step in bytes for indexed items */ + unsigned short in_step_offset; /* offset within one step */ + unsigned char shift; /* shift in bits */ + unsigned char element_size; /* size of element in bit array */ + bool no_real_shift; + union { + unsigned char bits; + unsigned short bytes; + } size; + const char *name; +}; + +static inline unsigned int +__mlxsw_item_offset(struct mlxsw_item *item, unsigned short index, + size_t typesize) +{ + BUG_ON(index && !item->step); + if (item->offset % typesize != 0 || + item->step % typesize != 0 || + item->in_step_offset % typesize != 0) { + pr_err("mlxsw: item bug (name=%s,offset=%x,step=%x,in_step_offset=%x,typesize=%zx)\n", + item->name, item->offset, item->step, + item->in_step_offset, typesize); + BUG(); + } + + return ((item->offset + item->step * index + item->in_step_offset) / + typesize); +} + +static inline u16 __mlxsw_item_get16(char *buf, struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u16)); + __be16 *b = (__be16 *) buf; + u16 tmp; + + tmp = be16_to_cpu(b[offset]); + tmp >>= item->shift; + tmp &= GENMASK(item->size.bits - 1, 0); + if (item->no_real_shift) + tmp <<= item->shift; + return tmp; +} + +static inline void __mlxsw_item_set16(char *buf, struct mlxsw_item *item, + unsigned short index, u16 val) +{ + unsigned int offset = __mlxsw_item_offset(item, index, + sizeof(u16)); + __be16 *b = (__be16 *) buf; + u16 mask = GENMASK(item->size.bits - 1, 0) << item->shift; + u16 tmp; + + if (!item->no_real_shift) + val <<= item->shift; + val &= mask; + tmp = be16_to_cpu(b[offset]); + tmp &= ~mask; + tmp |= val; + b[offset] = cpu_to_be16(tmp); +} + +static inline u32 __mlxsw_item_get32(char *buf, struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u32)); + __be32 *b = (__be32 *) buf; + u32 tmp; + + tmp = be32_to_cpu(b[offset]); + tmp >>= item->shift; + tmp &= GENMASK(item->size.bits - 1, 0); + if (item->no_real_shift) + tmp <<= item->shift; + return tmp; +} + +static inline void __mlxsw_item_set32(char *buf, struct mlxsw_item *item, + unsigned short index, u32 val) +{ + unsigned int offset = __mlxsw_item_offset(item, index, + sizeof(u32)); + __be32 *b = (__be32 *) buf; + u32 mask = GENMASK(item->size.bits - 1, 0) << item->shift; + u32 tmp; + + if (!item->no_real_shift) + val <<= item->shift; + val &= mask; + tmp = be32_to_cpu(b[offset]); + tmp &= ~mask; + tmp |= val; + b[offset] = cpu_to_be32(tmp); +} + +static inline u64 __mlxsw_item_get64(char *buf, struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64)); + __be64 *b = (__be64 *) buf; + u64 tmp; + + tmp = be64_to_cpu(b[offset]); + tmp >>= item->shift; + tmp &= GENMASK_ULL(item->size.bits - 1, 0); + if (item->no_real_shift) + tmp <<= item->shift; + return tmp; +} + +static inline void __mlxsw_item_set64(char *buf, struct mlxsw_item *item, + unsigned short index, u64 val) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64)); + __be64 *b = (__be64 *) buf; + u64 mask = GENMASK_ULL(item->size.bits - 1, 0) << item->shift; + u64 tmp; + + if (!item->no_real_shift) + val <<= item->shift; + val &= mask; + tmp = be64_to_cpu(b[offset]); + tmp &= ~mask; + tmp |= val; + b[offset] = cpu_to_be64(tmp); +} + +static inline void __mlxsw_item_memcpy_from(char *buf, char *dst, + struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char)); + + memcpy(dst, &buf[offset], item->size.bytes); +} + +static inline void __mlxsw_item_memcpy_to(char *buf, const char *src, + struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char)); + + memcpy(&buf[offset], src, item->size.bytes); +} + +static inline u16 +__mlxsw_item_bit_array_offset(struct mlxsw_item *item, u16 index, u8 *shift) +{ + u16 max_index, be_index; + u16 offset; /* byte offset inside the array */ + u8 in_byte_index; + + BUG_ON(index && !item->element_size); + if (item->offset % sizeof(u32) != 0 || + BITS_PER_BYTE % item->element_size != 0) { + pr_err("mlxsw: item bug (name=%s,offset=%x,element_size=%x)\n", + item->name, item->offset, item->element_size); + BUG(); + } + + max_index = (item->size.bytes << 3) / item->element_size - 1; + be_index = max_index - index; + offset = be_index * item->element_size >> 3; + in_byte_index = index % (BITS_PER_BYTE / item->element_size); + *shift = in_byte_index * item->element_size; + + return item->offset + offset; +} + +static inline u8 __mlxsw_item_bit_array_get(char *buf, struct mlxsw_item *item, + u16 index) +{ + u8 shift, tmp; + u16 offset = __mlxsw_item_bit_array_offset(item, index, &shift); + + tmp = buf[offset]; + tmp >>= shift; + tmp &= GENMASK(item->element_size - 1, 0); + return tmp; +} + +static inline void __mlxsw_item_bit_array_set(char *buf, struct mlxsw_item *item, + u16 index, u8 val) +{ + u8 shift, tmp; + u16 offset = __mlxsw_item_bit_array_offset(item, index, &shift); + u8 mask = GENMASK(item->element_size - 1, 0) << shift; + + val <<= shift; + val &= mask; + tmp = buf[offset]; + tmp &= ~mask; + tmp |= val; + buf[offset] = tmp; +} + +#define __ITEM_NAME(_type, _cname, _iname) \ + mlxsw_##_type##_##_cname##_##_iname##_item + +/* _type: cmd_mbox, reg, etc. + * _cname: containter name (e.g. command name, register name) + * _iname: item name within the container + */ + +#define MLXSW_ITEM16(_type, _cname, _iname, _offset, _shift, _sizebits) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u16 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf) \ +{ \ + return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 val)\ +{ \ + __mlxsw_item_set16(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ +} + +#define MLXSW_ITEM16_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \ + _step, _instepoffset, _norealshift) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .shift = _shift, \ + .no_real_shift = _norealshift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u16 \ +mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index) \ +{ \ + return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ + u16 val) \ +{ \ + __mlxsw_item_set16(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} + +#define MLXSW_ITEM32(_type, _cname, _iname, _offset, _shift, _sizebits) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u32 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf) \ +{ \ + return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u32 val)\ +{ \ + __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ +} + +#define MLXSW_ITEM32_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \ + _step, _instepoffset, _norealshift) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .shift = _shift, \ + .no_real_shift = _norealshift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u32 \ +mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index) \ +{ \ + return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ + u32 val) \ +{ \ + __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} + +#define MLXSW_ITEM64(_type, _cname, _iname, _offset, _shift, _sizebits) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u64 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf) \ +{ \ + return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u64 val)\ +{ \ + __mlxsw_item_set64(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ +} + +#define MLXSW_ITEM64_INDEXED(_type, _cname, _iname, _offset, _shift, \ + _sizebits, _step, _instepoffset, _norealshift) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .shift = _shift, \ + .no_real_shift = _norealshift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u64 \ +mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index) \ +{ \ + return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ + u64 val) \ +{ \ + __mlxsw_item_set64(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} + +#define MLXSW_ITEM_BUF(_type, _cname, _iname, _offset, _sizebytes) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .size = {.bytes = _sizebytes,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(char *buf, char *dst) \ +{ \ + __mlxsw_item_memcpy_from(buf, dst, \ + &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, const char *src) \ +{ \ + __mlxsw_item_memcpy_to(buf, src, \ + &__ITEM_NAME(_type, _cname, _iname), 0); \ +} + +#define MLXSW_ITEM_BUF_INDEXED(_type, _cname, _iname, _offset, _sizebytes, \ + _step, _instepoffset) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .size = {.bytes = _sizebytes,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(char *buf, \ + unsigned short index, \ + char *dst) \ +{ \ + __mlxsw_item_memcpy_from(buf, dst, \ + &__ITEM_NAME(_type, _cname, _iname), index); \ +} \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, \ + unsigned short index, \ + const char *src) \ +{ \ + __mlxsw_item_memcpy_to(buf, src, \ + &__ITEM_NAME(_type, _cname, _iname), index); \ +} + +#define MLXSW_ITEM_BIT_ARRAY(_type, _cname, _iname, _offset, _sizebytes, \ + _element_size) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .element_size = _element_size, \ + .size = {.bytes = _sizebytes,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u8 \ +mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, u16 index) \ +{ \ + return __mlxsw_item_bit_array_get(buf, \ + &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 index, u8 val) \ +{ \ + return __mlxsw_item_bit_array_set(buf, \ + &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} \ + +#endif diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/pci.c b/kernel/drivers/net/ethernet/mellanox/mlxsw/pci.c new file mode 100644 index 000000000..de69e719d --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -0,0 +1,1847 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/pci.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/export.h> +#include <linux/err.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/wait.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/log2.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/string.h> + +#include "pci.h" +#include "core.h" +#include "cmd.h" +#include "port.h" + +static const char mlxsw_pci_driver_name[] = "mlxsw_pci"; + +static const struct pci_device_id mlxsw_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0}, + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, + {0, } +}; + +static struct dentry *mlxsw_pci_dbg_root; + +static const char *mlxsw_pci_device_kind_get(const struct pci_device_id *id) +{ + switch (id->device) { + case PCI_DEVICE_ID_MELLANOX_SWITCHX2: + return MLXSW_DEVICE_KIND_SWITCHX2; + case PCI_DEVICE_ID_MELLANOX_SPECTRUM: + return MLXSW_DEVICE_KIND_SPECTRUM; + default: + BUG(); + } +} + +#define mlxsw_pci_write32(mlxsw_pci, reg, val) \ + iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) +#define mlxsw_pci_read32(mlxsw_pci, reg) \ + ioread32be((mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) + +enum mlxsw_pci_queue_type { + MLXSW_PCI_QUEUE_TYPE_SDQ, + MLXSW_PCI_QUEUE_TYPE_RDQ, + MLXSW_PCI_QUEUE_TYPE_CQ, + MLXSW_PCI_QUEUE_TYPE_EQ, +}; + +static const char *mlxsw_pci_queue_type_str(enum mlxsw_pci_queue_type q_type) +{ + switch (q_type) { + case MLXSW_PCI_QUEUE_TYPE_SDQ: + return "sdq"; + case MLXSW_PCI_QUEUE_TYPE_RDQ: + return "rdq"; + case MLXSW_PCI_QUEUE_TYPE_CQ: + return "cq"; + case MLXSW_PCI_QUEUE_TYPE_EQ: + return "eq"; + } + BUG(); +} + +#define MLXSW_PCI_QUEUE_TYPE_COUNT 4 + +static const u16 mlxsw_pci_doorbell_type_offset[] = { + MLXSW_PCI_DOORBELL_SDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_SDQ */ + MLXSW_PCI_DOORBELL_RDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_RDQ */ + MLXSW_PCI_DOORBELL_CQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_CQ */ + MLXSW_PCI_DOORBELL_EQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_EQ */ +}; + +static const u16 mlxsw_pci_doorbell_arm_type_offset[] = { + 0, /* unused */ + 0, /* unused */ + MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_CQ */ + MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_EQ */ +}; + +struct mlxsw_pci_mem_item { + char *buf; + dma_addr_t mapaddr; + size_t size; +}; + +struct mlxsw_pci_queue_elem_info { + char *elem; /* pointer to actual dma mapped element mem chunk */ + union { + struct { + struct sk_buff *skb; + } sdq; + struct { + struct sk_buff *skb; + } rdq; + } u; +}; + +struct mlxsw_pci_queue { + spinlock_t lock; /* for queue accesses */ + struct mlxsw_pci_mem_item mem_item; + struct mlxsw_pci_queue_elem_info *elem_info; + u16 producer_counter; + u16 consumer_counter; + u16 count; /* number of elements in queue */ + u8 num; /* queue number */ + u8 elem_size; /* size of one element */ + enum mlxsw_pci_queue_type type; + struct tasklet_struct tasklet; /* queue processing tasklet */ + struct mlxsw_pci *pci; + union { + struct { + u32 comp_sdq_count; + u32 comp_rdq_count; + } cq; + struct { + u32 ev_cmd_count; + u32 ev_comp_count; + u32 ev_other_count; + } eq; + } u; +}; + +struct mlxsw_pci_queue_type_group { + struct mlxsw_pci_queue *q; + u8 count; /* number of queues in group */ +}; + +struct mlxsw_pci { + struct pci_dev *pdev; + u8 __iomem *hw_addr; + struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; + u32 doorbell_offset; + struct msix_entry msix_entry; + struct mlxsw_core *core; + struct { + struct mlxsw_pci_mem_item *items; + unsigned int count; + } fw_area; + struct { + struct mlxsw_pci_mem_item out_mbox; + struct mlxsw_pci_mem_item in_mbox; + struct mutex lock; /* Lock access to command registers */ + bool nopoll; + wait_queue_head_t wait; + bool wait_done; + struct { + u8 status; + u64 out_param; + } comp; + } cmd; + struct mlxsw_bus_info bus_info; + struct dentry *dbg_dir; +}; + +static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) +{ + tasklet_schedule(&q->tasklet); +} + +static char *__mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, + size_t elem_size, int elem_index) +{ + return q->mem_item.buf + (elem_size * elem_index); +} + +static struct mlxsw_pci_queue_elem_info * +mlxsw_pci_queue_elem_info_get(struct mlxsw_pci_queue *q, int elem_index) +{ + return &q->elem_info[elem_index]; +} + +static struct mlxsw_pci_queue_elem_info * +mlxsw_pci_queue_elem_info_producer_get(struct mlxsw_pci_queue *q) +{ + int index = q->producer_counter & (q->count - 1); + + if ((q->producer_counter - q->consumer_counter) == q->count) + return NULL; + return mlxsw_pci_queue_elem_info_get(q, index); +} + +static struct mlxsw_pci_queue_elem_info * +mlxsw_pci_queue_elem_info_consumer_get(struct mlxsw_pci_queue *q) +{ + int index = q->consumer_counter & (q->count - 1); + + return mlxsw_pci_queue_elem_info_get(q, index); +} + +static char *mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, int elem_index) +{ + return mlxsw_pci_queue_elem_info_get(q, elem_index)->elem; +} + +static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit) +{ + return owner_bit != !!(q->consumer_counter & q->count); +} + +static char *mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q, + u32 (*get_elem_owner_func)(char *)) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + char *elem; + bool owner_bit; + + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + elem = elem_info->elem; + owner_bit = get_elem_owner_func(elem); + if (mlxsw_pci_elem_hw_owned(q, owner_bit)) + return NULL; + q->consumer_counter++; + rmb(); /* make sure we read owned bit before the rest of elem */ + return elem; +} + +static struct mlxsw_pci_queue_type_group * +mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci, + enum mlxsw_pci_queue_type q_type) +{ + return &mlxsw_pci->queues[q_type]; +} + +static u8 __mlxsw_pci_queue_count(struct mlxsw_pci *mlxsw_pci, + enum mlxsw_pci_queue_type q_type) +{ + struct mlxsw_pci_queue_type_group *queue_group; + + queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_type); + return queue_group->count; +} + +static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci) +{ + return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ); +} + +static u8 mlxsw_pci_rdq_count(struct mlxsw_pci *mlxsw_pci) +{ + return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_RDQ); +} + +static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci) +{ + return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ); +} + +static u8 mlxsw_pci_eq_count(struct mlxsw_pci *mlxsw_pci) +{ + return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ); +} + +static struct mlxsw_pci_queue * +__mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci, + enum mlxsw_pci_queue_type q_type, u8 q_num) +{ + return &mlxsw_pci->queues[q_type].q[q_num]; +} + +static struct mlxsw_pci_queue *mlxsw_pci_sdq_get(struct mlxsw_pci *mlxsw_pci, + u8 q_num) +{ + return __mlxsw_pci_queue_get(mlxsw_pci, + MLXSW_PCI_QUEUE_TYPE_SDQ, q_num); +} + +static struct mlxsw_pci_queue *mlxsw_pci_rdq_get(struct mlxsw_pci *mlxsw_pci, + u8 q_num) +{ + return __mlxsw_pci_queue_get(mlxsw_pci, + MLXSW_PCI_QUEUE_TYPE_RDQ, q_num); +} + +static struct mlxsw_pci_queue *mlxsw_pci_cq_get(struct mlxsw_pci *mlxsw_pci, + u8 q_num) +{ + return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ, q_num); +} + +static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci, + u8 q_num) +{ + return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, q_num); +} + +static void __mlxsw_pci_queue_doorbell_set(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q, + u16 val) +{ + mlxsw_pci_write32(mlxsw_pci, + DOORBELL(mlxsw_pci->doorbell_offset, + mlxsw_pci_doorbell_type_offset[q->type], + q->num), val); +} + +static void __mlxsw_pci_queue_doorbell_arm_set(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q, + u16 val) +{ + mlxsw_pci_write32(mlxsw_pci, + DOORBELL(mlxsw_pci->doorbell_offset, + mlxsw_pci_doorbell_arm_type_offset[q->type], + q->num), val); +} + +static void mlxsw_pci_queue_doorbell_producer_ring(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + wmb(); /* ensure all writes are done before we ring a bell */ + __mlxsw_pci_queue_doorbell_set(mlxsw_pci, q, q->producer_counter); +} + +static void mlxsw_pci_queue_doorbell_consumer_ring(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + wmb(); /* ensure all writes are done before we ring a bell */ + __mlxsw_pci_queue_doorbell_set(mlxsw_pci, q, + q->consumer_counter + q->count); +} + +static void +mlxsw_pci_queue_doorbell_arm_consumer_ring(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + wmb(); /* ensure all writes are done before we ring a bell */ + __mlxsw_pci_queue_doorbell_arm_set(mlxsw_pci, q, q->consumer_counter); +} + +static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q, + int page_index) +{ + return q->mem_item.mapaddr + MLXSW_PCI_PAGE_SIZE * page_index; +} + +static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int i; + int err; + + q->producer_counter = 0; + q->consumer_counter = 0; + + /* Set CQ of same number of this SDQ. */ + mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); + mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 7); + mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr); + } + + err = mlxsw_cmd_sw2hw_sdq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num); +} + +static int mlxsw_pci_sdq_dbg_read(struct seq_file *file, void *data) +{ + struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); + struct mlxsw_pci_queue *q; + int i; + static const char hdr[] = + "NUM PROD_COUNT CONS_COUNT COUNT\n"; + + seq_printf(file, hdr); + for (i = 0; i < mlxsw_pci_sdq_count(mlxsw_pci); i++) { + q = mlxsw_pci_sdq_get(mlxsw_pci, i); + spin_lock_bh(&q->lock); + seq_printf(file, "%3d %10d %10d %5d\n", + i, q->producer_counter, q->consumer_counter, + q->count); + spin_unlock_bh(&q->lock); + } + return 0; +} + +static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe, + int index, char *frag_data, size_t frag_len, + int direction) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + dma_addr_t mapaddr; + + mapaddr = pci_map_single(pdev, frag_data, frag_len, direction); + if (unlikely(pci_dma_mapping_error(pdev, mapaddr))) { + dev_err_ratelimited(&pdev->dev, "failed to dma map tx frag\n"); + return -EIO; + } + mlxsw_pci_wqe_address_set(wqe, index, mapaddr); + mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len); + return 0; +} + +static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe, + int index, int direction) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + size_t frag_len = mlxsw_pci_wqe_byte_count_get(wqe, index); + dma_addr_t mapaddr = mlxsw_pci_wqe_address_get(wqe, index); + + if (!frag_len) + return; + pci_unmap_single(pdev, mapaddr, frag_len, direction); +} + +static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue_elem_info *elem_info) +{ + size_t buf_len = MLXSW_PORT_MAX_MTU; + char *wqe = elem_info->elem; + struct sk_buff *skb; + int err; + + elem_info->u.rdq.skb = NULL; + skb = netdev_alloc_skb_ip_align(NULL, buf_len); + if (!skb) + return -ENOMEM; + + /* Assume that wqe was previously zeroed. */ + + err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, + buf_len, DMA_FROM_DEVICE); + if (err) + goto err_frag_map; + + elem_info->u.rdq.skb = skb; + return 0; + +err_frag_map: + dev_kfree_skb_any(skb); + return err; +} + +static void mlxsw_pci_rdq_skb_free(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue_elem_info *elem_info) +{ + struct sk_buff *skb; + char *wqe; + + skb = elem_info->u.rdq.skb; + wqe = elem_info->elem; + + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); +} + +static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + u8 sdq_count = mlxsw_pci_sdq_count(mlxsw_pci); + int i; + int err; + + q->producer_counter = 0; + q->consumer_counter = 0; + + /* Set CQ of same number of this RDQ with base + * above SDQ count as the lower ones are assigned to SDQs. + */ + mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, sdq_count + q->num); + mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr); + } + + err = mlxsw_cmd_sw2hw_rdq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + + for (i = 0; i < q->count; i++) { + elem_info = mlxsw_pci_queue_elem_info_producer_get(q); + BUG_ON(!elem_info); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + if (err) + goto rollback; + /* Everything is set up, ring doorbell to pass elem to HW */ + q->producer_counter++; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + } + + return 0; + +rollback: + for (i--; i >= 0; i--) { + elem_info = mlxsw_pci_queue_elem_info_get(q, i); + mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info); + } + mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num); + + return err; +} + +static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + int i; + + mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num); + for (i = 0; i < q->count; i++) { + elem_info = mlxsw_pci_queue_elem_info_get(q, i); + mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info); + } +} + +static int mlxsw_pci_rdq_dbg_read(struct seq_file *file, void *data) +{ + struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); + struct mlxsw_pci_queue *q; + int i; + static const char hdr[] = + "NUM PROD_COUNT CONS_COUNT COUNT\n"; + + seq_printf(file, hdr); + for (i = 0; i < mlxsw_pci_rdq_count(mlxsw_pci); i++) { + q = mlxsw_pci_rdq_get(mlxsw_pci, i); + spin_lock_bh(&q->lock); + seq_printf(file, "%3d %10d %10d %5d\n", + i, q->producer_counter, q->consumer_counter, + q->count); + spin_unlock_bh(&q->lock); + } + return 0; +} + +static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int i; + int err; + + q->consumer_counter = 0; + + for (i = 0; i < q->count; i++) { + char *elem = mlxsw_pci_queue_elem_get(q, i); + + mlxsw_pci_cqe_owner_set(elem, 1); + } + + mlxsw_cmd_mbox_sw2hw_cq_cv_set(mbox, 0); /* CQE ver 0 */ + mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); + mlxsw_cmd_mbox_sw2hw_cq_oi_set(mbox, 0); + mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); + mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr); + } + err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); +} + +static int mlxsw_pci_cq_dbg_read(struct seq_file *file, void *data) +{ + struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); + + struct mlxsw_pci_queue *q; + int i; + static const char hdr[] = + "NUM CONS_INDEX SDQ_COUNT RDQ_COUNT COUNT\n"; + + seq_printf(file, hdr); + for (i = 0; i < mlxsw_pci_cq_count(mlxsw_pci); i++) { + q = mlxsw_pci_cq_get(mlxsw_pci, i); + spin_lock_bh(&q->lock); + seq_printf(file, "%3d %10d %10d %10d %5d\n", + i, q->consumer_counter, q->u.cq.comp_sdq_count, + q->u.cq.comp_rdq_count, q->count); + spin_unlock_bh(&q->lock); + } + return 0; +} + +static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q, + u16 consumer_counter_limit, + char *cqe) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + struct mlxsw_pci_queue_elem_info *elem_info; + char *wqe; + struct sk_buff *skb; + int i; + + spin_lock(&q->lock); + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + skb = elem_info->u.sdq.skb; + wqe = elem_info->elem; + for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + elem_info->u.sdq.skb = NULL; + + if (q->consumer_counter++ != consumer_counter_limit) + dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in SDQ\n"); + spin_unlock(&q->lock); +} + +static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q, + u16 consumer_counter_limit, + char *cqe) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + struct mlxsw_pci_queue_elem_info *elem_info; + char *wqe; + struct sk_buff *skb; + struct mlxsw_rx_info rx_info; + u16 byte_count; + int err; + + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + skb = elem_info->u.sdq.skb; + if (!skb) + return; + wqe = elem_info->elem; + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + + if (q->consumer_counter++ != consumer_counter_limit) + dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); + + /* We do not support lag now */ + if (mlxsw_pci_cqe_lag_get(cqe)) + goto drop; + + rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); + + byte_count = mlxsw_pci_cqe_byte_count_get(cqe); + if (mlxsw_pci_cqe_crc_get(cqe)) + byte_count -= ETH_FCS_LEN; + skb_put(skb, byte_count); + mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); + +put_new_skb: + memset(wqe, 0, q->elem_size); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + if (err) + dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); + /* Everything is set up, ring doorbell to pass elem to HW */ + q->producer_counter++; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + return; + +drop: + dev_kfree_skb_any(skb); + goto put_new_skb; +} + +static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) +{ + return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_cqe_owner_get); +} + +static void mlxsw_pci_cq_tasklet(unsigned long data) +{ + struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data; + struct mlxsw_pci *mlxsw_pci = q->pci; + char *cqe; + int items = 0; + int credits = q->count >> 1; + + while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { + u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(cqe); + + if (sendq) { + struct mlxsw_pci_queue *sdq; + + sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); + mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, + wqe_counter, cqe); + q->u.cq.comp_sdq_count++; + } else { + struct mlxsw_pci_queue *rdq; + + rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); + mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, + wqe_counter, cqe); + q->u.cq.comp_rdq_count++; + } + if (++items == credits) + break; + } + if (items) { + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + } +} + +static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int i; + int err; + + q->consumer_counter = 0; + + for (i = 0; i < q->count; i++) { + char *elem = mlxsw_pci_queue_elem_get(q, i); + + mlxsw_pci_eqe_owner_set(elem, 1); + } + + mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */ + mlxsw_cmd_mbox_sw2hw_eq_oi_set(mbox, 0); + mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */ + mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count)); + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr); + } + err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num); +} + +static int mlxsw_pci_eq_dbg_read(struct seq_file *file, void *data) +{ + struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); + struct mlxsw_pci_queue *q; + int i; + static const char hdr[] = + "NUM CONS_COUNT EV_CMD EV_COMP EV_OTHER COUNT\n"; + + seq_printf(file, hdr); + for (i = 0; i < mlxsw_pci_eq_count(mlxsw_pci); i++) { + q = mlxsw_pci_eq_get(mlxsw_pci, i); + spin_lock_bh(&q->lock); + seq_printf(file, "%3d %10d %10d %10d %10d %5d\n", + i, q->consumer_counter, q->u.eq.ev_cmd_count, + q->u.eq.ev_comp_count, q->u.eq.ev_other_count, + q->count); + spin_unlock_bh(&q->lock); + } + return 0; +} + +static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe) +{ + mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe); + mlxsw_pci->cmd.comp.out_param = + ((u64) mlxsw_pci_eqe_cmd_out_param_h_get(eqe)) << 32 | + mlxsw_pci_eqe_cmd_out_param_l_get(eqe); + mlxsw_pci->cmd.wait_done = true; + wake_up(&mlxsw_pci->cmd.wait); +} + +static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) +{ + return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_eqe_owner_get); +} + +static void mlxsw_pci_eq_tasklet(unsigned long data) +{ + struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data; + struct mlxsw_pci *mlxsw_pci = q->pci; + u8 cq_count = mlxsw_pci_cq_count(mlxsw_pci); + unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)]; + char *eqe; + u8 cqn; + bool cq_handle = false; + int items = 0; + int credits = q->count >> 1; + + memset(&active_cqns, 0, sizeof(active_cqns)); + + while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) { + u8 event_type = mlxsw_pci_eqe_event_type_get(eqe); + + switch (event_type) { + case MLXSW_PCI_EQE_EVENT_TYPE_CMD: + mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe); + q->u.eq.ev_cmd_count++; + break; + case MLXSW_PCI_EQE_EVENT_TYPE_COMP: + cqn = mlxsw_pci_eqe_cqn_get(eqe); + set_bit(cqn, active_cqns); + cq_handle = true; + q->u.eq.ev_comp_count++; + break; + default: + q->u.eq.ev_other_count++; + } + if (++items == credits) + break; + } + if (items) { + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + } + + if (!cq_handle) + return; + for_each_set_bit(cqn, active_cqns, cq_count) { + q = mlxsw_pci_cq_get(mlxsw_pci, cqn); + mlxsw_pci_queue_tasklet_schedule(q); + } +} + +struct mlxsw_pci_queue_ops { + const char *name; + enum mlxsw_pci_queue_type type; + int (*init)(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q); + void (*fini)(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q); + void (*tasklet)(unsigned long data); + int (*dbg_read)(struct seq_file *s, void *data); + u16 elem_count; + u8 elem_size; +}; + +static const struct mlxsw_pci_queue_ops mlxsw_pci_sdq_ops = { + .type = MLXSW_PCI_QUEUE_TYPE_SDQ, + .init = mlxsw_pci_sdq_init, + .fini = mlxsw_pci_sdq_fini, + .dbg_read = mlxsw_pci_sdq_dbg_read, + .elem_count = MLXSW_PCI_WQE_COUNT, + .elem_size = MLXSW_PCI_WQE_SIZE, +}; + +static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = { + .type = MLXSW_PCI_QUEUE_TYPE_RDQ, + .init = mlxsw_pci_rdq_init, + .fini = mlxsw_pci_rdq_fini, + .dbg_read = mlxsw_pci_rdq_dbg_read, + .elem_count = MLXSW_PCI_WQE_COUNT, + .elem_size = MLXSW_PCI_WQE_SIZE +}; + +static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = { + .type = MLXSW_PCI_QUEUE_TYPE_CQ, + .init = mlxsw_pci_cq_init, + .fini = mlxsw_pci_cq_fini, + .tasklet = mlxsw_pci_cq_tasklet, + .dbg_read = mlxsw_pci_cq_dbg_read, + .elem_count = MLXSW_PCI_CQE_COUNT, + .elem_size = MLXSW_PCI_CQE_SIZE +}; + +static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = { + .type = MLXSW_PCI_QUEUE_TYPE_EQ, + .init = mlxsw_pci_eq_init, + .fini = mlxsw_pci_eq_fini, + .tasklet = mlxsw_pci_eq_tasklet, + .dbg_read = mlxsw_pci_eq_dbg_read, + .elem_count = MLXSW_PCI_EQE_COUNT, + .elem_size = MLXSW_PCI_EQE_SIZE +}; + +static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + const struct mlxsw_pci_queue_ops *q_ops, + struct mlxsw_pci_queue *q, u8 q_num) +{ + struct mlxsw_pci_mem_item *mem_item = &q->mem_item; + int i; + int err; + + spin_lock_init(&q->lock); + q->num = q_num; + q->count = q_ops->elem_count; + q->elem_size = q_ops->elem_size; + q->type = q_ops->type; + q->pci = mlxsw_pci; + + if (q_ops->tasklet) + tasklet_init(&q->tasklet, q_ops->tasklet, (unsigned long) q); + + mem_item->size = MLXSW_PCI_AQ_SIZE; + mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev, + mem_item->size, + &mem_item->mapaddr); + if (!mem_item->buf) + return -ENOMEM; + memset(mem_item->buf, 0, mem_item->size); + + q->elem_info = kcalloc(q->count, sizeof(*q->elem_info), GFP_KERNEL); + if (!q->elem_info) { + err = -ENOMEM; + goto err_elem_info_alloc; + } + + /* Initialize dma mapped elements info elem_info for + * future easy access. + */ + for (i = 0; i < q->count; i++) { + struct mlxsw_pci_queue_elem_info *elem_info; + + elem_info = mlxsw_pci_queue_elem_info_get(q, i); + elem_info->elem = + __mlxsw_pci_queue_elem_get(q, q_ops->elem_size, i); + } + + mlxsw_cmd_mbox_zero(mbox); + err = q_ops->init(mlxsw_pci, mbox, q); + if (err) + goto err_q_ops_init; + return 0; + +err_q_ops_init: + kfree(q->elem_info); +err_elem_info_alloc: + pci_free_consistent(mlxsw_pci->pdev, mem_item->size, + mem_item->buf, mem_item->mapaddr); + return err; +} + +static void mlxsw_pci_queue_fini(struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_pci_queue_ops *q_ops, + struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_mem_item *mem_item = &q->mem_item; + + q_ops->fini(mlxsw_pci, q); + kfree(q->elem_info); + pci_free_consistent(mlxsw_pci->pdev, mem_item->size, + mem_item->buf, mem_item->mapaddr); +} + +static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + const struct mlxsw_pci_queue_ops *q_ops, + u8 num_qs) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + struct mlxsw_pci_queue_type_group *queue_group; + char tmp[16]; + int i; + int err; + + queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type); + queue_group->q = kcalloc(num_qs, sizeof(*queue_group->q), GFP_KERNEL); + if (!queue_group->q) + return -ENOMEM; + + for (i = 0; i < num_qs; i++) { + err = mlxsw_pci_queue_init(mlxsw_pci, mbox, q_ops, + &queue_group->q[i], i); + if (err) + goto err_queue_init; + } + queue_group->count = num_qs; + + sprintf(tmp, "%s_stats", mlxsw_pci_queue_type_str(q_ops->type)); + debugfs_create_devm_seqfile(&pdev->dev, tmp, mlxsw_pci->dbg_dir, + q_ops->dbg_read); + + return 0; + +err_queue_init: + for (i--; i >= 0; i--) + mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]); + kfree(queue_group->q); + return err; +} + +static void mlxsw_pci_queue_group_fini(struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_pci_queue_ops *q_ops) +{ + struct mlxsw_pci_queue_type_group *queue_group; + int i; + + queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type); + for (i = 0; i < queue_group->count; i++) + mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]); + kfree(queue_group->q); +} + +static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + u8 num_sdqs; + u8 sdq_log2sz; + u8 num_rdqs; + u8 rdq_log2sz; + u8 num_cqs; + u8 cq_log2sz; + u8 num_eqs; + u8 eq_log2sz; + int err; + + mlxsw_cmd_mbox_zero(mbox); + err = mlxsw_cmd_query_aq_cap(mlxsw_pci->core, mbox); + if (err) + return err; + + num_sdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_sdqs_get(mbox); + sdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_sdq_sz_get(mbox); + num_rdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_rdqs_get(mbox); + rdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_rdq_sz_get(mbox); + num_cqs = mlxsw_cmd_mbox_query_aq_cap_max_num_cqs_get(mbox); + cq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cq_sz_get(mbox); + num_eqs = mlxsw_cmd_mbox_query_aq_cap_max_num_eqs_get(mbox); + eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox); + + if (num_sdqs + num_rdqs > num_cqs || + num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) { + dev_err(&pdev->dev, "Unsupported number of queues\n"); + return -EINVAL; + } + + if ((1 << sdq_log2sz != MLXSW_PCI_WQE_COUNT) || + (1 << rdq_log2sz != MLXSW_PCI_WQE_COUNT) || + (1 << cq_log2sz != MLXSW_PCI_CQE_COUNT) || + (1 << eq_log2sz != MLXSW_PCI_EQE_COUNT)) { + dev_err(&pdev->dev, "Unsupported number of async queue descriptors\n"); + return -EINVAL; + } + + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops, + num_eqs); + if (err) { + dev_err(&pdev->dev, "Failed to initialize event queues\n"); + return err; + } + + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_cq_ops, + num_cqs); + if (err) { + dev_err(&pdev->dev, "Failed to initialize completion queues\n"); + goto err_cqs_init; + } + + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_sdq_ops, + num_sdqs); + if (err) { + dev_err(&pdev->dev, "Failed to initialize send descriptor queues\n"); + goto err_sdqs_init; + } + + err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_rdq_ops, + num_rdqs); + if (err) { + dev_err(&pdev->dev, "Failed to initialize receive descriptor queues\n"); + goto err_rdqs_init; + } + + /* We have to poll in command interface until queues are initialized */ + mlxsw_pci->cmd.nopoll = true; + return 0; + +err_rdqs_init: + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops); +err_sdqs_init: + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops); +err_cqs_init: + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops); + return err; +} + +static void mlxsw_pci_aqs_fini(struct mlxsw_pci *mlxsw_pci) +{ + mlxsw_pci->cmd.nopoll = false; + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_rdq_ops); + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops); + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops); + mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops); +} + +static void +mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, + char *mbox, int index, + const struct mlxsw_swid_config *swid) +{ + u8 mask = 0; + + if (swid->used_type) { + mlxsw_cmd_mbox_config_profile_swid_config_type_set( + mbox, index, swid->type); + mask |= 1; + } + if (swid->used_properties) { + mlxsw_cmd_mbox_config_profile_swid_config_properties_set( + mbox, index, swid->properties); + mask |= 2; + } + mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask); +} + +static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, + const struct mlxsw_config_profile *profile) +{ + int i; + + mlxsw_cmd_mbox_zero(mbox); + + if (profile->used_max_vepa_channels) { + mlxsw_cmd_mbox_config_profile_set_max_vepa_channels_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_vepa_channels_set( + mbox, profile->max_vepa_channels); + } + if (profile->used_max_lag) { + mlxsw_cmd_mbox_config_profile_set_max_lag_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_lag_set( + mbox, profile->max_lag); + } + if (profile->used_max_port_per_lag) { + mlxsw_cmd_mbox_config_profile_set_max_port_per_lag_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_port_per_lag_set( + mbox, profile->max_port_per_lag); + } + if (profile->used_max_mid) { + mlxsw_cmd_mbox_config_profile_set_max_mid_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_mid_set( + mbox, profile->max_mid); + } + if (profile->used_max_pgt) { + mlxsw_cmd_mbox_config_profile_set_max_pgt_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_pgt_set( + mbox, profile->max_pgt); + } + if (profile->used_max_system_port) { + mlxsw_cmd_mbox_config_profile_set_max_system_port_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_system_port_set( + mbox, profile->max_system_port); + } + if (profile->used_max_vlan_groups) { + mlxsw_cmd_mbox_config_profile_set_max_vlan_groups_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_vlan_groups_set( + mbox, profile->max_vlan_groups); + } + if (profile->used_max_regions) { + mlxsw_cmd_mbox_config_profile_set_max_regions_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_regions_set( + mbox, profile->max_regions); + } + if (profile->used_flood_tables) { + mlxsw_cmd_mbox_config_profile_set_flood_tables_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_flood_tables_set( + mbox, profile->max_flood_tables); + mlxsw_cmd_mbox_config_profile_max_vid_flood_tables_set( + mbox, profile->max_vid_flood_tables); + mlxsw_cmd_mbox_config_profile_max_fid_offset_flood_tables_set( + mbox, profile->max_fid_offset_flood_tables); + mlxsw_cmd_mbox_config_profile_fid_offset_flood_table_size_set( + mbox, profile->fid_offset_flood_table_size); + mlxsw_cmd_mbox_config_profile_max_fid_flood_tables_set( + mbox, profile->max_fid_flood_tables); + mlxsw_cmd_mbox_config_profile_fid_flood_table_size_set( + mbox, profile->fid_flood_table_size); + } + if (profile->used_flood_mode) { + mlxsw_cmd_mbox_config_profile_set_flood_mode_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_flood_mode_set( + mbox, profile->flood_mode); + } + if (profile->used_max_ib_mc) { + mlxsw_cmd_mbox_config_profile_set_max_ib_mc_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_ib_mc_set( + mbox, profile->max_ib_mc); + } + if (profile->used_max_pkey) { + mlxsw_cmd_mbox_config_profile_set_max_pkey_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_max_pkey_set( + mbox, profile->max_pkey); + } + if (profile->used_ar_sec) { + mlxsw_cmd_mbox_config_profile_set_ar_sec_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_ar_sec_set( + mbox, profile->ar_sec); + } + if (profile->used_adaptive_routing_group_cap) { + mlxsw_cmd_mbox_config_profile_set_adaptive_routing_group_cap_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( + mbox, profile->adaptive_routing_group_cap); + } + + for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) + mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i, + &profile->swid_config[i]); + + return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox); +} + +static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox) +{ + struct mlxsw_bus_info *bus_info = &mlxsw_pci->bus_info; + int err; + + mlxsw_cmd_mbox_zero(mbox); + err = mlxsw_cmd_boardinfo(mlxsw_pci->core, mbox); + if (err) + return err; + mlxsw_cmd_mbox_boardinfo_vsd_memcpy_from(mbox, bus_info->vsd); + mlxsw_cmd_mbox_boardinfo_psid_memcpy_from(mbox, bus_info->psid); + return 0; +} + +static int mlxsw_pci_fw_area_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + u16 num_pages) +{ + struct mlxsw_pci_mem_item *mem_item; + int nent = 0; + int i; + int err; + + mlxsw_pci->fw_area.items = kcalloc(num_pages, sizeof(*mem_item), + GFP_KERNEL); + if (!mlxsw_pci->fw_area.items) + return -ENOMEM; + mlxsw_pci->fw_area.count = num_pages; + + mlxsw_cmd_mbox_zero(mbox); + for (i = 0; i < num_pages; i++) { + mem_item = &mlxsw_pci->fw_area.items[i]; + + mem_item->size = MLXSW_PCI_PAGE_SIZE; + mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev, + mem_item->size, + &mem_item->mapaddr); + if (!mem_item->buf) { + err = -ENOMEM; + goto err_alloc; + } + mlxsw_cmd_mbox_map_fa_pa_set(mbox, nent, mem_item->mapaddr); + mlxsw_cmd_mbox_map_fa_log2size_set(mbox, nent, 0); /* 1 page */ + if (++nent == MLXSW_CMD_MAP_FA_VPM_ENTRIES_MAX) { + err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, nent); + if (err) + goto err_cmd_map_fa; + nent = 0; + mlxsw_cmd_mbox_zero(mbox); + } + } + + if (nent) { + err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, nent); + if (err) + goto err_cmd_map_fa; + } + + return 0; + +err_cmd_map_fa: +err_alloc: + for (i--; i >= 0; i--) { + mem_item = &mlxsw_pci->fw_area.items[i]; + + pci_free_consistent(mlxsw_pci->pdev, mem_item->size, + mem_item->buf, mem_item->mapaddr); + } + kfree(mlxsw_pci->fw_area.items); + return err; +} + +static void mlxsw_pci_fw_area_fini(struct mlxsw_pci *mlxsw_pci) +{ + struct mlxsw_pci_mem_item *mem_item; + int i; + + mlxsw_cmd_unmap_fa(mlxsw_pci->core); + + for (i = 0; i < mlxsw_pci->fw_area.count; i++) { + mem_item = &mlxsw_pci->fw_area.items[i]; + + pci_free_consistent(mlxsw_pci->pdev, mem_item->size, + mem_item->buf, mem_item->mapaddr); + } + kfree(mlxsw_pci->fw_area.items); +} + +static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id) +{ + struct mlxsw_pci *mlxsw_pci = dev_id; + struct mlxsw_pci_queue *q; + int i; + + for (i = 0; i < MLXSW_PCI_EQS_COUNT; i++) { + q = mlxsw_pci_eq_get(mlxsw_pci, i); + mlxsw_pci_queue_tasklet_schedule(q); + } + return IRQ_HANDLED; +} + +static int mlxsw_pci_mbox_alloc(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_mem_item *mbox) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + int err = 0; + + mbox->size = MLXSW_CMD_MBOX_SIZE; + mbox->buf = pci_alloc_consistent(pdev, MLXSW_CMD_MBOX_SIZE, + &mbox->mapaddr); + if (!mbox->buf) { + dev_err(&pdev->dev, "Failed allocating memory for mailbox\n"); + err = -ENOMEM; + } + + return err; +} + +static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_mem_item *mbox) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + + pci_free_consistent(pdev, MLXSW_CMD_MBOX_SIZE, mbox->buf, + mbox->mapaddr); +} + +static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, + const struct mlxsw_config_profile *profile) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + struct pci_dev *pdev = mlxsw_pci->pdev; + char *mbox; + u16 num_pages; + int err; + + mutex_init(&mlxsw_pci->cmd.lock); + init_waitqueue_head(&mlxsw_pci->cmd.wait); + + mlxsw_pci->core = mlxsw_core; + + mbox = mlxsw_cmd_mbox_alloc(); + if (!mbox) + return -ENOMEM; + + err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); + if (err) + goto mbox_put; + + err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); + if (err) + goto err_out_mbox_alloc; + + err = mlxsw_cmd_query_fw(mlxsw_core, mbox); + if (err) + goto err_query_fw; + + mlxsw_pci->bus_info.fw_rev.major = + mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox); + mlxsw_pci->bus_info.fw_rev.minor = + mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox); + mlxsw_pci->bus_info.fw_rev.subminor = + mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox); + + if (mlxsw_cmd_mbox_query_fw_cmd_interface_rev_get(mbox) != 1) { + dev_err(&pdev->dev, "Unsupported cmd interface revision ID queried from hw\n"); + err = -EINVAL; + goto err_iface_rev; + } + if (mlxsw_cmd_mbox_query_fw_doorbell_page_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported doorbell page bar queried from hw\n"); + err = -EINVAL; + goto err_doorbell_page_bar; + } + + mlxsw_pci->doorbell_offset = + mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox); + + num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox); + err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages); + if (err) + goto err_fw_area_init; + + err = mlxsw_pci_boardinfo(mlxsw_pci, mbox); + if (err) + goto err_boardinfo; + + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile); + if (err) + goto err_config_profile; + + err = mlxsw_pci_aqs_init(mlxsw_pci, mbox); + if (err) + goto err_aqs_init; + + err = request_irq(mlxsw_pci->msix_entry.vector, + mlxsw_pci_eq_irq_handler, 0, + mlxsw_pci_driver_name, mlxsw_pci); + if (err) { + dev_err(&pdev->dev, "IRQ request failed\n"); + goto err_request_eq_irq; + } + + goto mbox_put; + +err_request_eq_irq: + mlxsw_pci_aqs_fini(mlxsw_pci); +err_aqs_init: +err_config_profile: +err_boardinfo: + mlxsw_pci_fw_area_fini(mlxsw_pci); +err_fw_area_init: +err_doorbell_page_bar: +err_iface_rev: +err_query_fw: + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); +err_out_mbox_alloc: + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); +mbox_put: + mlxsw_cmd_mbox_free(mbox); + return err; +} + +static void mlxsw_pci_fini(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + + free_irq(mlxsw_pci->msix_entry.vector, mlxsw_pci); + mlxsw_pci_aqs_fini(mlxsw_pci); + mlxsw_pci_fw_area_fini(mlxsw_pci); + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); + mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); +} + +static struct mlxsw_pci_queue * +mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_tx_info *tx_info) +{ + u8 sdqn = tx_info->local_port % mlxsw_pci_sdq_count(mlxsw_pci); + + return mlxsw_pci_sdq_get(mlxsw_pci, sdqn); +} + +static bool mlxsw_pci_skb_transmit_busy(void *bus_priv, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + struct mlxsw_pci_queue *q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info); + + return !mlxsw_pci_queue_elem_info_producer_get(q); +} + +static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + struct mlxsw_pci_queue *q; + struct mlxsw_pci_queue_elem_info *elem_info; + char *wqe; + int i; + int err; + + if (skb_shinfo(skb)->nr_frags > MLXSW_PCI_WQE_SG_ENTRIES - 1) { + err = skb_linearize(skb); + if (err) + return err; + } + + q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info); + spin_lock_bh(&q->lock); + elem_info = mlxsw_pci_queue_elem_info_producer_get(q); + if (!elem_info) { + /* queue is full */ + err = -EAGAIN; + goto unlock; + } + elem_info->u.sdq.skb = skb; + + wqe = elem_info->elem; + mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */ + mlxsw_pci_wqe_lp_set(wqe, !!tx_info->is_emad); + mlxsw_pci_wqe_type_set(wqe, MLXSW_PCI_WQE_TYPE_ETHERNET); + + err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (err) + goto unlock; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, i + 1, + skb_frag_address(frag), + skb_frag_size(frag), + DMA_TO_DEVICE); + if (err) + goto unmap_frags; + } + + /* Set unused sq entries byte count to zero. */ + for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) + mlxsw_pci_wqe_byte_count_set(wqe, i, 0); + + /* Everything is set up, ring producer doorbell to get HW going */ + q->producer_counter++; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); + + goto unlock; + +unmap_frags: + for (; i >= 0; i--) + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE); +unlock: + spin_unlock_bh(&q->lock); + return err; +} + +static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, + u32 in_mod, bool out_mbox_direct, + char *in_mbox, size_t in_mbox_size, + char *out_mbox, size_t out_mbox_size, + u8 *p_status) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + dma_addr_t in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr; + dma_addr_t out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr; + bool evreq = mlxsw_pci->cmd.nopoll; + unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS); + bool *p_wait_done = &mlxsw_pci->cmd.wait_done; + int err; + + *p_status = MLXSW_CMD_STATUS_OK; + + err = mutex_lock_interruptible(&mlxsw_pci->cmd.lock); + if (err) + return err; + + if (in_mbox) + memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size); + mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr)); + mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr)); + + mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr)); + mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr)); + + mlxsw_pci_write32(mlxsw_pci, CIR_IN_MODIFIER, in_mod); + mlxsw_pci_write32(mlxsw_pci, CIR_TOKEN, 0); + + *p_wait_done = false; + + wmb(); /* all needs to be written before we write control register */ + mlxsw_pci_write32(mlxsw_pci, CIR_CTRL, + MLXSW_PCI_CIR_CTRL_GO_BIT | + (evreq ? MLXSW_PCI_CIR_CTRL_EVREQ_BIT : 0) | + (opcode_mod << MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT) | + opcode); + + if (!evreq) { + unsigned long end; + + end = jiffies + timeout; + do { + u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL); + + if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) { + *p_wait_done = true; + *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT; + break; + } + cond_resched(); + } while (time_before(jiffies, end)); + } else { + wait_event_timeout(mlxsw_pci->cmd.wait, *p_wait_done, timeout); + *p_status = mlxsw_pci->cmd.comp.status; + } + + err = 0; + if (*p_wait_done) { + if (*p_status) + err = -EIO; + } else { + err = -ETIMEDOUT; + } + + if (!err && out_mbox && out_mbox_direct) { + /* Some commands don't use output param as address to mailbox + * but they store output directly into registers. In that case, + * copy registers into mbox buffer. + */ + __be32 tmp; + + if (!evreq) { + tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, + CIR_OUT_PARAM_HI)); + memcpy(out_mbox, &tmp, sizeof(tmp)); + tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, + CIR_OUT_PARAM_LO)); + memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp)); + } + } else if (!err && out_mbox) { + memcpy(out_mbox, mlxsw_pci->cmd.out_mbox.buf, out_mbox_size); + } + + mutex_unlock(&mlxsw_pci->cmd.lock); + + return err; +} + +static const struct mlxsw_bus mlxsw_pci_bus = { + .kind = "pci", + .init = mlxsw_pci_init, + .fini = mlxsw_pci_fini, + .skb_transmit_busy = mlxsw_pci_skb_transmit_busy, + .skb_transmit = mlxsw_pci_skb_transmit, + .cmd_exec = mlxsw_pci_cmd_exec, +}; + +static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci) +{ + mlxsw_pci_write32(mlxsw_pci, SW_RESET, MLXSW_PCI_SW_RESET_RST_BIT); + /* Current firware does not let us know when the reset is done. + * So we just wait here for constant time and hope for the best. + */ + msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); + return 0; +} + +static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct mlxsw_pci *mlxsw_pci; + int err; + + mlxsw_pci = kzalloc(sizeof(*mlxsw_pci), GFP_KERNEL); + if (!mlxsw_pci) + return -ENOMEM; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device failed\n"); + goto err_pci_enable_device; + } + + err = pci_request_regions(pdev, mlxsw_pci_driver_name); + if (err) { + dev_err(&pdev->dev, "pci_request_regions failed\n"); + goto err_pci_request_regions; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (!err) { + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n"); + goto err_pci_set_dma_mask; + } + } else { + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "pci_set_dma_mask failed\n"); + goto err_pci_set_dma_mask; + } + } + + if (pci_resource_len(pdev, 0) < MLXSW_PCI_BAR0_SIZE) { + dev_err(&pdev->dev, "invalid PCI region size\n"); + err = -EINVAL; + goto err_pci_resource_len_check; + } + + mlxsw_pci->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!mlxsw_pci->hw_addr) { + dev_err(&pdev->dev, "ioremap failed\n"); + err = -EIO; + goto err_ioremap; + } + pci_set_master(pdev); + + mlxsw_pci->pdev = pdev; + pci_set_drvdata(pdev, mlxsw_pci); + + err = mlxsw_pci_sw_reset(mlxsw_pci); + if (err) { + dev_err(&pdev->dev, "Software reset failed\n"); + goto err_sw_reset; + } + + err = pci_enable_msix_exact(pdev, &mlxsw_pci->msix_entry, 1); + if (err) { + dev_err(&pdev->dev, "MSI-X init failed\n"); + goto err_msix_init; + } + + mlxsw_pci->bus_info.device_kind = mlxsw_pci_device_kind_get(id); + mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev); + mlxsw_pci->bus_info.dev = &pdev->dev; + + mlxsw_pci->dbg_dir = debugfs_create_dir(mlxsw_pci->bus_info.device_name, + mlxsw_pci_dbg_root); + if (!mlxsw_pci->dbg_dir) { + dev_err(&pdev->dev, "Failed to create debugfs dir\n"); + err = -ENOMEM; + goto err_dbg_create_dir; + } + + err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, + &mlxsw_pci_bus, mlxsw_pci); + if (err) { + dev_err(&pdev->dev, "cannot register bus device\n"); + goto err_bus_device_register; + } + + return 0; + +err_bus_device_register: + debugfs_remove_recursive(mlxsw_pci->dbg_dir); +err_dbg_create_dir: + pci_disable_msix(mlxsw_pci->pdev); +err_msix_init: +err_sw_reset: + iounmap(mlxsw_pci->hw_addr); +err_ioremap: +err_pci_resource_len_check: +err_pci_set_dma_mask: + pci_release_regions(pdev); +err_pci_request_regions: + pci_disable_device(pdev); +err_pci_enable_device: + kfree(mlxsw_pci); + return err; +} + +static void mlxsw_pci_remove(struct pci_dev *pdev) +{ + struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev); + + mlxsw_core_bus_device_unregister(mlxsw_pci->core); + debugfs_remove_recursive(mlxsw_pci->dbg_dir); + pci_disable_msix(mlxsw_pci->pdev); + iounmap(mlxsw_pci->hw_addr); + pci_release_regions(mlxsw_pci->pdev); + pci_disable_device(mlxsw_pci->pdev); + kfree(mlxsw_pci); +} + +static struct pci_driver mlxsw_pci_driver = { + .name = mlxsw_pci_driver_name, + .id_table = mlxsw_pci_id_table, + .probe = mlxsw_pci_probe, + .remove = mlxsw_pci_remove, +}; + +static int __init mlxsw_pci_module_init(void) +{ + int err; + + mlxsw_pci_dbg_root = debugfs_create_dir(mlxsw_pci_driver_name, NULL); + if (!mlxsw_pci_dbg_root) + return -ENOMEM; + err = pci_register_driver(&mlxsw_pci_driver); + if (err) + goto err_register_driver; + return 0; + +err_register_driver: + debugfs_remove_recursive(mlxsw_pci_dbg_root); + return err; +} + +static void __exit mlxsw_pci_module_exit(void) +{ + pci_unregister_driver(&mlxsw_pci_driver); + debugfs_remove_recursive(mlxsw_pci_dbg_root); +} + +module_init(mlxsw_pci_module_init); +module_exit(mlxsw_pci_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox switch PCI interface driver"); +MODULE_DEVICE_TABLE(pci, mlxsw_pci_id_table); diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/pci.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/pci.h new file mode 100644 index 000000000..142f33d97 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -0,0 +1,226 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/pci.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_PCI_H +#define _MLXSW_PCI_H + +#include <linux/bitops.h> + +#include "item.h" + +#define PCI_DEVICE_ID_MELLANOX_SWITCHX2 0xc738 +#define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84 +#define MLXSW_PCI_BAR0_SIZE (1024 * 1024) /* 1MB */ +#define MLXSW_PCI_PAGE_SIZE 4096 + +#define MLXSW_PCI_CIR_BASE 0x71000 +#define MLXSW_PCI_CIR_IN_PARAM_HI MLXSW_PCI_CIR_BASE +#define MLXSW_PCI_CIR_IN_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x04) +#define MLXSW_PCI_CIR_IN_MODIFIER (MLXSW_PCI_CIR_BASE + 0x08) +#define MLXSW_PCI_CIR_OUT_PARAM_HI (MLXSW_PCI_CIR_BASE + 0x0C) +#define MLXSW_PCI_CIR_OUT_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x10) +#define MLXSW_PCI_CIR_TOKEN (MLXSW_PCI_CIR_BASE + 0x14) +#define MLXSW_PCI_CIR_CTRL (MLXSW_PCI_CIR_BASE + 0x18) +#define MLXSW_PCI_CIR_CTRL_GO_BIT BIT(23) +#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT BIT(22) +#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT 12 +#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT 24 +#define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000 + +#define MLXSW_PCI_SW_RESET 0xF0010 +#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 + +#define MLXSW_PCI_DOORBELL_SDQ_OFFSET 0x000 +#define MLXSW_PCI_DOORBELL_RDQ_OFFSET 0x200 +#define MLXSW_PCI_DOORBELL_CQ_OFFSET 0x400 +#define MLXSW_PCI_DOORBELL_EQ_OFFSET 0x600 +#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET 0x800 +#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET 0xA00 + +#define MLXSW_PCI_DOORBELL(offset, type_offset, num) \ + ((offset) + (type_offset) + (num) * 4) + +#define MLXSW_PCI_CQS_MAX 96 +#define MLXSW_PCI_EQS_COUNT 2 +#define MLXSW_PCI_EQ_ASYNC_NUM 0 +#define MLXSW_PCI_EQ_COMP_NUM 1 + +#define MLXSW_PCI_AQ_PAGES 8 +#define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES) +#define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ +#define MLXSW_PCI_CQE_SIZE 16 /* 16 bytes per element */ +#define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ +#define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) +#define MLXSW_PCI_CQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE) +#define MLXSW_PCI_EQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE) +#define MLXSW_PCI_EQE_UPDATE_COUNT 0x80 + +#define MLXSW_PCI_WQE_SG_ENTRIES 3 +#define MLXSW_PCI_WQE_TYPE_ETHERNET 0xA + +/* pci_wqe_c + * If set it indicates that a completion should be reported upon + * execution of this descriptor. + */ +MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1); + +/* pci_wqe_lp + * Local Processing, set if packet should be processed by the local + * switch hardware: + * For Ethernet EMAD (Direct Route and non Direct Route) - + * must be set if packet destination is local device + * For InfiniBand CTL - must be set if packet destination is local device + * Otherwise it must be clear + * Local Process packets must not exceed the size of 2K (including payload + * and headers). + */ +MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1); + +/* pci_wqe_type + * Packet type. + */ +MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4); + +/* pci_wqe_byte_count + * Size of i-th scatter/gather entry, 0 if entry is unused. + */ +MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false); + +/* pci_wqe_address + * Physical address of i-th scatter/gather entry. + * Gather Entries must be 2Byte aligned. + */ +MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); + +/* pci_cqe_lag + * Packet arrives from a port which is a LAG + */ +MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); + +/* pci_cqe_system_port + * When lag=0: System port on which the packet was received + * When lag=1: + * bits [15:4] LAG ID on which the packet was received + * bits [3:0] sub_port on which the packet was received + */ +MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); + +/* pci_cqe_wqe_counter + * WQE count of the WQEs completed on the associated dqn + */ +MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16); + +/* pci_cqe_byte_count + * Byte count of received packets including additional two + * Reserved Bytes that are append to the end of the frame. + * Reserved for Send CQE. + */ +MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14); + +/* pci_cqe_trap_id + * Trap ID that captured the packet. + */ +MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8); + +/* pci_cqe_crc + * Length include CRC. Indicates the length field includes + * the packet's CRC. + */ +MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1); + +/* pci_cqe_e + * CQE with Error. + */ +MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1); + +/* pci_cqe_sr + * 1 - Send Queue + * 0 - Receive Queue + */ +MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1); + +/* pci_cqe_dqn + * Descriptor Queue (DQ) Number. + */ +MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5); + +/* pci_cqe_owner + * Ownership bit. + */ +MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1); + +/* pci_eqe_event_type + * Event type. + */ +MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8); +#define MLXSW_PCI_EQE_EVENT_TYPE_COMP 0x00 +#define MLXSW_PCI_EQE_EVENT_TYPE_CMD 0x0A + +/* pci_eqe_event_sub_type + * Event type. + */ +MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8); + +/* pci_eqe_cqn + * Completion Queue that triggeret this EQE. + */ +MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7); + +/* pci_eqe_owner + * Ownership bit. + */ +MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1); + +/* pci_eqe_cmd_token + * Command completion event - token + */ +MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16); + +/* pci_eqe_cmd_status + * Command completion event - status + */ +MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8); + +/* pci_eqe_cmd_out_param_h + * Command completion event - output parameter - higher part + */ +MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32); + +/* pci_eqe_cmd_out_param_l + * Command completion event - output parameter - lower part + */ +MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32); + +#endif diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/port.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/port.h new file mode 100644 index 000000000..726f5435b --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -0,0 +1,75 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/port.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MLXSW_PORT_H +#define _MLXSW_PORT_H + +#include <linux/types.h> + +#define MLXSW_PORT_MAX_MTU 10000 + +#define MLXSW_PORT_DEFAULT_VID 1 + +#define MLXSW_PORT_SWID_DISABLED_PORT 255 +#define MLXSW_PORT_SWID_ALL_SWIDS 254 +#define MLXSW_PORT_SWID_TYPE_ETH 2 + +#define MLXSW_PORT_MID 0xd000 + +#define MLXSW_PORT_MAX_PHY_PORTS 0x40 +#define MLXSW_PORT_MAX_PORTS MLXSW_PORT_MAX_PHY_PORTS + +#define MLXSW_PORT_DEVID_BITS_OFFSET 10 +#define MLXSW_PORT_PHY_BITS_OFFSET 4 +#define MLXSW_PORT_PHY_BITS_MASK (MLXSW_PORT_MAX_PHY_PORTS - 1) + +#define MLXSW_PORT_CPU_PORT 0x0 + +#define MLXSW_PORT_DONT_CARE (MLXSW_PORT_MAX_PORTS) + +enum mlxsw_port_admin_status { + MLXSW_PORT_ADMIN_STATUS_UP = 1, + MLXSW_PORT_ADMIN_STATUS_DOWN = 2, + MLXSW_PORT_ADMIN_STATUS_UP_ONCE = 3, + MLXSW_PORT_ADMIN_STATUS_DISABLED = 4, +}; + +enum mlxsw_reg_pude_oper_status { + MLXSW_PORT_OPER_STATUS_UP = 1, + MLXSW_PORT_OPER_STATUS_DOWN = 2, + MLXSW_PORT_OPER_STATUS_FAILURE = 4, /* Can be set to up again. */ +}; + +#endif /* _MLXSW_PORT_H */ diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/reg.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/reg.h new file mode 100644 index 000000000..236fb5d2a --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -0,0 +1,2460 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/reg.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_REG_H +#define _MLXSW_REG_H + +#include <linux/string.h> +#include <linux/bitops.h> +#include <linux/if_vlan.h> + +#include "item.h" +#include "port.h" + +struct mlxsw_reg_info { + u16 id; + u16 len; /* In u8 */ +}; + +#define MLXSW_REG(type) (&mlxsw_reg_##type) +#define MLXSW_REG_LEN(type) MLXSW_REG(type)->len +#define MLXSW_REG_ZERO(type, payload) memset(payload, 0, MLXSW_REG(type)->len) + +/* SGCR - Switch General Configuration Register + * -------------------------------------------- + * This register is used for configuration of the switch capabilities. + */ +#define MLXSW_REG_SGCR_ID 0x2000 +#define MLXSW_REG_SGCR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_sgcr = { + .id = MLXSW_REG_SGCR_ID, + .len = MLXSW_REG_SGCR_LEN, +}; + +/* reg_sgcr_llb + * Link Local Broadcast (Default=0) + * When set, all Link Local packets (224.0.0.X) will be treated as broadcast + * packets and ignore the IGMP snooping entries. + * Access: RW + */ +MLXSW_ITEM32(reg, sgcr, llb, 0x04, 0, 1); + +static inline void mlxsw_reg_sgcr_pack(char *payload, bool llb) +{ + MLXSW_REG_ZERO(sgcr, payload); + mlxsw_reg_sgcr_llb_set(payload, !!llb); +} + +/* SPAD - Switch Physical Address Register + * --------------------------------------- + * The SPAD register configures the switch physical MAC address. + */ +#define MLXSW_REG_SPAD_ID 0x2002 +#define MLXSW_REG_SPAD_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_spad = { + .id = MLXSW_REG_SPAD_ID, + .len = MLXSW_REG_SPAD_LEN, +}; + +/* reg_spad_base_mac + * Base MAC address for the switch partitions. + * Per switch partition MAC address is equal to: + * base_mac + swid + * Access: RW + */ +MLXSW_ITEM_BUF(reg, spad, base_mac, 0x02, 6); + +/* SSPR - Switch System Port Record Register + * ----------------------------------------- + * Configures the system port to local port mapping. + */ +#define MLXSW_REG_SSPR_ID 0x2008 +#define MLXSW_REG_SSPR_LEN 0x8 + +static const struct mlxsw_reg_info mlxsw_reg_sspr = { + .id = MLXSW_REG_SSPR_ID, + .len = MLXSW_REG_SSPR_LEN, +}; + +/* reg_sspr_m + * Master - if set, then the record describes the master system port. + * This is needed in case a local port is mapped into several system ports + * (for multipathing). That number will be reported as the source system + * port when packets are forwarded to the CPU. Only one master port is allowed + * per local port. + * + * Note: Must be set for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1); + +/* reg_sspr_local_port + * Local port number. + * + * Access: RW + */ +MLXSW_ITEM32(reg, sspr, local_port, 0x00, 16, 8); + +/* reg_sspr_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * + * Access: RW + */ +MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8); + +/* reg_sspr_system_port + * Unique identifier within the stacking domain that represents all the ports + * that are available in the system (external ports). + * + * Currently, only single-ASIC configurations are supported, so we default to + * 1:1 mapping between system ports and local ports. + * Access: Index + */ +MLXSW_ITEM32(reg, sspr, system_port, 0x04, 0, 16); + +static inline void mlxsw_reg_sspr_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(sspr, payload); + mlxsw_reg_sspr_m_set(payload, 1); + mlxsw_reg_sspr_local_port_set(payload, local_port); + mlxsw_reg_sspr_sub_port_set(payload, 0); + mlxsw_reg_sspr_system_port_set(payload, local_port); +} + +/* SFDAT - Switch Filtering Database Aging Time + * -------------------------------------------- + * Controls the Switch aging time. Aging time is able to be set per Switch + * Partition. + */ +#define MLXSW_REG_SFDAT_ID 0x2009 +#define MLXSW_REG_SFDAT_LEN 0x8 + +static const struct mlxsw_reg_info mlxsw_reg_sfdat = { + .id = MLXSW_REG_SFDAT_ID, + .len = MLXSW_REG_SFDAT_LEN, +}; + +/* reg_sfdat_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, sfdat, swid, 0x00, 24, 8); + +/* reg_sfdat_age_time + * Aging time in seconds + * Min - 10 seconds + * Max - 1,000,000 seconds + * Default is 300 seconds. + * Access: RW + */ +MLXSW_ITEM32(reg, sfdat, age_time, 0x04, 0, 20); + +static inline void mlxsw_reg_sfdat_pack(char *payload, u32 age_time) +{ + MLXSW_REG_ZERO(sfdat, payload); + mlxsw_reg_sfdat_swid_set(payload, 0); + mlxsw_reg_sfdat_age_time_set(payload, age_time); +} + +/* SFD - Switch Filtering Database + * ------------------------------- + * The following register defines the access to the filtering database. + * The register supports querying, adding, removing and modifying the database. + * The access is optimized for bulk updates in which case more than one + * FDB record is present in the same command. + */ +#define MLXSW_REG_SFD_ID 0x200A +#define MLXSW_REG_SFD_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_SFD_REC_LEN 0x10 /* record length */ +#define MLXSW_REG_SFD_REC_MAX_COUNT 64 +#define MLXSW_REG_SFD_LEN (MLXSW_REG_SFD_BASE_LEN + \ + MLXSW_REG_SFD_REC_LEN * MLXSW_REG_SFD_REC_MAX_COUNT) + +static const struct mlxsw_reg_info mlxsw_reg_sfd = { + .id = MLXSW_REG_SFD_ID, + .len = MLXSW_REG_SFD_LEN, +}; + +/* reg_sfd_swid + * Switch partition ID for queries. Reserved on Write. + * Access: Index + */ +MLXSW_ITEM32(reg, sfd, swid, 0x00, 24, 8); + +enum mlxsw_reg_sfd_op { + /* Dump entire FDB a (process according to record_locator) */ + MLXSW_REG_SFD_OP_QUERY_DUMP = 0, + /* Query records by {MAC, VID/FID} value */ + MLXSW_REG_SFD_OP_QUERY_QUERY = 1, + /* Query and clear activity. Query records by {MAC, VID/FID} value */ + MLXSW_REG_SFD_OP_QUERY_QUERY_AND_CLEAR_ACTIVITY = 2, + /* Test. Response indicates if each of the records could be + * added to the FDB. + */ + MLXSW_REG_SFD_OP_WRITE_TEST = 0, + /* Add/modify. Aged-out records cannot be added. This command removes + * the learning notification of the {MAC, VID/FID}. Response includes + * the entries that were added to the FDB. + */ + MLXSW_REG_SFD_OP_WRITE_EDIT = 1, + /* Remove record by {MAC, VID/FID}. This command also removes + * the learning notification and aged-out notifications + * of the {MAC, VID/FID}. The response provides current (pre-removal) + * entries as non-aged-out. + */ + MLXSW_REG_SFD_OP_WRITE_REMOVE = 2, + /* Remove learned notification by {MAC, VID/FID}. The response provides + * the removed learning notification. + */ + MLXSW_REG_SFD_OP_WRITE_REMOVE_NOTIFICATION = 2, +}; + +/* reg_sfd_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, sfd, op, 0x04, 30, 2); + +/* reg_sfd_record_locator + * Used for querying the FDB. Use record_locator=0 to initiate the + * query. When a record is returned, a new record_locator is + * returned to be used in the subsequent query. + * Reserved for database update. + * Access: Index + */ +MLXSW_ITEM32(reg, sfd, record_locator, 0x04, 0, 30); + +/* reg_sfd_num_rec + * Request: Number of records to read/add/modify/remove + * Response: Number of records read/added/replaced/removed + * See above description for more details. + * Ranges 0..64 + * Access: RW + */ +MLXSW_ITEM32(reg, sfd, num_rec, 0x08, 0, 8); + +static inline void mlxsw_reg_sfd_pack(char *payload, enum mlxsw_reg_sfd_op op, + u32 record_locator) +{ + MLXSW_REG_ZERO(sfd, payload); + mlxsw_reg_sfd_op_set(payload, op); + mlxsw_reg_sfd_record_locator_set(payload, record_locator); +} + +/* reg_sfd_rec_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8, + MLXSW_REG_SFD_REC_LEN, 0x00, false); + +enum mlxsw_reg_sfd_rec_type { + MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0, +}; + +/* reg_sfd_rec_type + * FDB record type. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_type, MLXSW_REG_SFD_BASE_LEN, 20, 4, + MLXSW_REG_SFD_REC_LEN, 0x00, false); + +enum mlxsw_reg_sfd_rec_policy { + /* Replacement disabled, aging disabled. */ + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY = 0, + /* (mlag remote): Replacement enabled, aging disabled, + * learning notification enabled on this port. + */ + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG = 1, + /* (ingress device): Replacement enabled, aging enabled. */ + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS = 3, +}; + +/* reg_sfd_rec_policy + * Policy. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_policy, MLXSW_REG_SFD_BASE_LEN, 18, 2, + MLXSW_REG_SFD_REC_LEN, 0x00, false); + +/* reg_sfd_rec_a + * Activity. Set for new static entries. Set for static entries if a frame SMAC + * lookup hits on the entry. + * To clear the a bit, use "query and clear activity" op. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_a, MLXSW_REG_SFD_BASE_LEN, 16, 1, + MLXSW_REG_SFD_REC_LEN, 0x00, false); + +/* reg_sfd_rec_mac + * MAC address. + * Access: Index + */ +MLXSW_ITEM_BUF_INDEXED(reg, sfd, rec_mac, MLXSW_REG_SFD_BASE_LEN, 6, + MLXSW_REG_SFD_REC_LEN, 0x02); + +enum mlxsw_reg_sfd_rec_action { + /* forward */ + MLXSW_REG_SFD_REC_ACTION_NOP = 0, + /* forward and trap, trap_id is FDB_TRAP */ + MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1, + /* trap and do not forward, trap_id is FDB_TRAP */ + MLXSW_REG_SFD_REC_ACTION_TRAP = 3, + MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15, +}; + +/* reg_sfd_rec_action + * Action to apply on the packet. + * Note: Dynamic entries can only be configured with NOP action. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, rec_action, MLXSW_REG_SFD_BASE_LEN, 28, 4, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +/* reg_sfd_uc_sub_port + * VEPA channel on local port. + * Valid only if local port is a non-stacking port. Must be 0 if multichannel + * VEPA is not enabled. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_fid_vid + * Filtering ID or VLAN ID + * For SwitchX and SwitchX-2: + * - Dynamic entries (policy 2,3) use FID + * - Static entries (policy 0) use VID + * - When independent learning is configured, VID=FID + * For Spectrum: use FID for both Dynamic and Static entries. + * VID should not be used. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_system_port + * Unique port identifier for the final destination of the packet. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u8 local_port) +{ + u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload); + + if (rec_index >= num_rec) + mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1); + mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0); + mlxsw_reg_sfd_rec_type_set(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST); + mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); + mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac); + mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid); + mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); + mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port); +} + +static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u8 *p_local_port) +{ + mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfd_uc_fid_vid_get(payload, rec_index); + *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index); +} + +/* SFN - Switch FDB Notification Register + * ------------------------------------------- + * The switch provides notifications on newly learned FDB entries and + * aged out entries. The notifications can be polled by software. + */ +#define MLXSW_REG_SFN_ID 0x200B +#define MLXSW_REG_SFN_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_SFN_REC_LEN 0x10 /* record length */ +#define MLXSW_REG_SFN_REC_MAX_COUNT 64 +#define MLXSW_REG_SFN_LEN (MLXSW_REG_SFN_BASE_LEN + \ + MLXSW_REG_SFN_REC_LEN * MLXSW_REG_SFN_REC_MAX_COUNT) + +static const struct mlxsw_reg_info mlxsw_reg_sfn = { + .id = MLXSW_REG_SFN_ID, + .len = MLXSW_REG_SFN_LEN, +}; + +/* reg_sfn_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, sfn, swid, 0x00, 24, 8); + +/* reg_sfn_num_rec + * Request: Number of learned notifications and aged-out notification + * records requested. + * Response: Number of notification records returned (must be smaller + * than or equal to the value requested) + * Ranges 0..64 + * Access: OP + */ +MLXSW_ITEM32(reg, sfn, num_rec, 0x04, 0, 8); + +static inline void mlxsw_reg_sfn_pack(char *payload) +{ + MLXSW_REG_ZERO(sfn, payload); + mlxsw_reg_sfn_swid_set(payload, 0); + mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT); +} + +/* reg_sfn_rec_swid + * Switch partition ID. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8, + MLXSW_REG_SFN_REC_LEN, 0x00, false); + +enum mlxsw_reg_sfn_rec_type { + /* MAC addresses learned on a regular port. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5, + /* Aged-out MAC address on a regular port */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7, +}; + +/* reg_sfn_rec_type + * Notification record type. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, rec_type, MLXSW_REG_SFN_BASE_LEN, 20, 4, + MLXSW_REG_SFN_REC_LEN, 0x00, false); + +/* reg_sfn_rec_mac + * MAC address. + * Access: RO + */ +MLXSW_ITEM_BUF_INDEXED(reg, sfn, rec_mac, MLXSW_REG_SFN_BASE_LEN, 6, + MLXSW_REG_SFN_REC_LEN, 0x02); + +/* reg_sfn_mac_sub_port + * VEPA channel on the local port. + * 0 if multichannel VEPA is not enabled. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_sub_port, MLXSW_REG_SFN_BASE_LEN, 16, 8, + MLXSW_REG_SFN_REC_LEN, 0x08, false); + +/* reg_sfn_mac_fid + * Filtering identifier. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_fid, MLXSW_REG_SFN_BASE_LEN, 0, 16, + MLXSW_REG_SFN_REC_LEN, 0x08, false); + +/* reg_sfn_mac_system_port + * Unique port identifier for the final destination of the packet. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_system_port, MLXSW_REG_SFN_BASE_LEN, 0, 16, + MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u8 *p_local_port) +{ + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + *p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index); +} + +/* SPMS - Switch Port MSTP/RSTP State Register + * ------------------------------------------- + * Configures the spanning tree state of a physical port. + */ +#define MLXSW_REG_SPMS_ID 0x200D +#define MLXSW_REG_SPMS_LEN 0x404 + +static const struct mlxsw_reg_info mlxsw_reg_spms = { + .id = MLXSW_REG_SPMS_ID, + .len = MLXSW_REG_SPMS_LEN, +}; + +/* reg_spms_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, spms, local_port, 0x00, 16, 8); + +enum mlxsw_reg_spms_state { + MLXSW_REG_SPMS_STATE_NO_CHANGE, + MLXSW_REG_SPMS_STATE_DISCARDING, + MLXSW_REG_SPMS_STATE_LEARNING, + MLXSW_REG_SPMS_STATE_FORWARDING, +}; + +/* reg_spms_state + * Spanning tree state of each VLAN ID (VID) of the local port. + * 0 - Do not change spanning tree state (used only when writing). + * 1 - Discarding. No learning or forwarding to/from this port (default). + * 2 - Learning. Port is learning, but not forwarding. + * 3 - Forwarding. Port is learning and forwarding. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, spms, state, 0x04, 0x400, 2); + +static inline void mlxsw_reg_spms_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(spms, payload); + mlxsw_reg_spms_local_port_set(payload, local_port); +} + +static inline void mlxsw_reg_spms_vid_pack(char *payload, u16 vid, + enum mlxsw_reg_spms_state state) +{ + mlxsw_reg_spms_state_set(payload, vid, state); +} + +/* SPVID - Switch Port VID + * ----------------------- + * The switch port VID configures the default VID for a port. + */ +#define MLXSW_REG_SPVID_ID 0x200E +#define MLXSW_REG_SPVID_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_spvid = { + .id = MLXSW_REG_SPVID_ID, + .len = MLXSW_REG_SPVID_LEN, +}; + +/* reg_spvid_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, spvid, local_port, 0x00, 16, 8); + +/* reg_spvid_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8); + +/* reg_spvid_pvid + * Port default VID + * Access: RW + */ +MLXSW_ITEM32(reg, spvid, pvid, 0x04, 0, 12); + +static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid) +{ + MLXSW_REG_ZERO(spvid, payload); + mlxsw_reg_spvid_local_port_set(payload, local_port); + mlxsw_reg_spvid_pvid_set(payload, pvid); +} + +/* SPVM - Switch Port VLAN Membership + * ---------------------------------- + * The Switch Port VLAN Membership register configures the VLAN membership + * of a port in a VLAN denoted by VID. VLAN membership is managed per + * virtual port. The register can be used to add and remove VID(s) from a port. + */ +#define MLXSW_REG_SPVM_ID 0x200F +#define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */ +#define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_SPVM_REC_MAX_COUNT 256 +#define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \ + MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT) + +static const struct mlxsw_reg_info mlxsw_reg_spvm = { + .id = MLXSW_REG_SPVM_ID, + .len = MLXSW_REG_SPVM_LEN, +}; + +/* reg_spvm_pt + * Priority tagged. If this bit is set, packets forwarded to the port with + * untagged VLAN membership (u bit is set) will be tagged with priority tag + * (VID=0) + * Access: RW + */ +MLXSW_ITEM32(reg, spvm, pt, 0x00, 31, 1); + +/* reg_spvm_pte + * Priority Tagged Update Enable. On Write operations, if this bit is cleared, + * the pt bit will NOT be updated. To update the pt bit, pte must be set. + * Access: WO + */ +MLXSW_ITEM32(reg, spvm, pte, 0x00, 30, 1); + +/* reg_spvm_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, spvm, local_port, 0x00, 16, 8); + +/* reg_spvm_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, spvm, sub_port, 0x00, 8, 8); + +/* reg_spvm_num_rec + * Number of records to update. Each record contains: i, e, u, vid. + * Access: OP + */ +MLXSW_ITEM32(reg, spvm, num_rec, 0x00, 0, 8); + +/* reg_spvm_rec_i + * Ingress membership in VLAN ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvm, rec_i, + MLXSW_REG_SPVM_BASE_LEN, 14, 1, + MLXSW_REG_SPVM_REC_LEN, 0, false); + +/* reg_spvm_rec_e + * Egress membership in VLAN ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvm, rec_e, + MLXSW_REG_SPVM_BASE_LEN, 13, 1, + MLXSW_REG_SPVM_REC_LEN, 0, false); + +/* reg_spvm_rec_u + * Untagged - port is an untagged member - egress transmission uses untagged + * frames on VID<n> + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvm, rec_u, + MLXSW_REG_SPVM_BASE_LEN, 12, 1, + MLXSW_REG_SPVM_REC_LEN, 0, false); + +/* reg_spvm_rec_vid + * Egress membership in VLAN ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvm, rec_vid, + MLXSW_REG_SPVM_BASE_LEN, 0, 12, + MLXSW_REG_SPVM_REC_LEN, 0, false); + +static inline void mlxsw_reg_spvm_pack(char *payload, u8 local_port, + u16 vid_begin, u16 vid_end, + bool is_member, bool untagged) +{ + int size = vid_end - vid_begin + 1; + int i; + + MLXSW_REG_ZERO(spvm, payload); + mlxsw_reg_spvm_local_port_set(payload, local_port); + mlxsw_reg_spvm_num_rec_set(payload, size); + + for (i = 0; i < size; i++) { + mlxsw_reg_spvm_rec_i_set(payload, i, is_member); + mlxsw_reg_spvm_rec_e_set(payload, i, is_member); + mlxsw_reg_spvm_rec_u_set(payload, i, untagged); + mlxsw_reg_spvm_rec_vid_set(payload, i, vid_begin + i); + } +} + +/* SFGC - Switch Flooding Group Configuration + * ------------------------------------------ + * The following register controls the association of flooding tables and MIDs + * to packet types used for flooding. + */ +#define MLXSW_REG_SFGC_ID 0x2011 +#define MLXSW_REG_SFGC_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_sfgc = { + .id = MLXSW_REG_SFGC_ID, + .len = MLXSW_REG_SFGC_LEN, +}; + +enum mlxsw_reg_sfgc_type { + MLXSW_REG_SFGC_TYPE_BROADCAST, + MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6, + MLXSW_REG_SFGC_TYPE_RESERVED, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP, + MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL, + MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST, + MLXSW_REG_SFGC_TYPE_MAX, +}; + +/* reg_sfgc_type + * The traffic type to reach the flooding table. + * Access: Index + */ +MLXSW_ITEM32(reg, sfgc, type, 0x00, 0, 4); + +enum mlxsw_reg_sfgc_bridge_type { + MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID = 0, + MLXSW_REG_SFGC_BRIDGE_TYPE_VFID = 1, +}; + +/* reg_sfgc_bridge_type + * Access: Index + * + * Note: SwitchX-2 only supports 802.1Q mode. + */ +MLXSW_ITEM32(reg, sfgc, bridge_type, 0x04, 24, 3); + +enum mlxsw_flood_table_type { + MLXSW_REG_SFGC_TABLE_TYPE_VID = 1, + MLXSW_REG_SFGC_TABLE_TYPE_SINGLE = 2, + MLXSW_REG_SFGC_TABLE_TYPE_ANY = 0, + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST = 3, + MLXSW_REG_SFGC_TABLE_TYPE_FID = 4, +}; + +/* reg_sfgc_table_type + * See mlxsw_flood_table_type + * Access: RW + * + * Note: FID offset and FID types are not supported in SwitchX-2. + */ +MLXSW_ITEM32(reg, sfgc, table_type, 0x04, 16, 3); + +/* reg_sfgc_flood_table + * Flooding table index to associate with the specific type on the specific + * switch partition. + * Access: RW + */ +MLXSW_ITEM32(reg, sfgc, flood_table, 0x04, 0, 6); + +/* reg_sfgc_mid + * The multicast ID for the swid. Not supported for Spectrum + * Access: RW + */ +MLXSW_ITEM32(reg, sfgc, mid, 0x08, 0, 16); + +/* reg_sfgc_counter_set_type + * Counter Set Type for flow counters. + * Access: RW + */ +MLXSW_ITEM32(reg, sfgc, counter_set_type, 0x0C, 24, 8); + +/* reg_sfgc_counter_index + * Counter Index for flow counters. + * Access: RW + */ +MLXSW_ITEM32(reg, sfgc, counter_index, 0x0C, 0, 24); + +static inline void +mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type, + enum mlxsw_reg_sfgc_bridge_type bridge_type, + enum mlxsw_flood_table_type table_type, + unsigned int flood_table) +{ + MLXSW_REG_ZERO(sfgc, payload); + mlxsw_reg_sfgc_type_set(payload, type); + mlxsw_reg_sfgc_bridge_type_set(payload, bridge_type); + mlxsw_reg_sfgc_table_type_set(payload, table_type); + mlxsw_reg_sfgc_flood_table_set(payload, flood_table); + mlxsw_reg_sfgc_mid_set(payload, MLXSW_PORT_MID); +} + +/* SFTR - Switch Flooding Table Register + * ------------------------------------- + * The switch flooding table is used for flooding packet replication. The table + * defines a bit mask of ports for packet replication. + */ +#define MLXSW_REG_SFTR_ID 0x2012 +#define MLXSW_REG_SFTR_LEN 0x420 + +static const struct mlxsw_reg_info mlxsw_reg_sftr = { + .id = MLXSW_REG_SFTR_ID, + .len = MLXSW_REG_SFTR_LEN, +}; + +/* reg_sftr_swid + * Switch partition ID with which to associate the port. + * Access: Index + */ +MLXSW_ITEM32(reg, sftr, swid, 0x00, 24, 8); + +/* reg_sftr_flood_table + * Flooding table index to associate with the specific type on the specific + * switch partition. + * Access: Index + */ +MLXSW_ITEM32(reg, sftr, flood_table, 0x00, 16, 6); + +/* reg_sftr_index + * Index. Used as an index into the Flooding Table in case the table is + * configured to use VID / FID or FID Offset. + * Access: Index + */ +MLXSW_ITEM32(reg, sftr, index, 0x00, 0, 16); + +/* reg_sftr_table_type + * See mlxsw_flood_table_type + * Access: RW + */ +MLXSW_ITEM32(reg, sftr, table_type, 0x04, 16, 3); + +/* reg_sftr_range + * Range of entries to update + * Access: Index + */ +MLXSW_ITEM32(reg, sftr, range, 0x04, 0, 16); + +/* reg_sftr_port + * Local port membership (1 bit per port). + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, sftr, port, 0x20, 0x20, 1); + +/* reg_sftr_cpu_port_mask + * CPU port mask (1 bit per port). + * Access: W + */ +MLXSW_ITEM_BIT_ARRAY(reg, sftr, port_mask, 0x220, 0x20, 1); + +static inline void mlxsw_reg_sftr_pack(char *payload, + unsigned int flood_table, + unsigned int index, + enum mlxsw_flood_table_type table_type, + unsigned int range, u8 port, bool set) +{ + MLXSW_REG_ZERO(sftr, payload); + mlxsw_reg_sftr_swid_set(payload, 0); + mlxsw_reg_sftr_flood_table_set(payload, flood_table); + mlxsw_reg_sftr_index_set(payload, index); + mlxsw_reg_sftr_table_type_set(payload, table_type); + mlxsw_reg_sftr_range_set(payload, range); + mlxsw_reg_sftr_port_set(payload, port, set); + mlxsw_reg_sftr_port_mask_set(payload, port, 1); +} + +/* SPMLR - Switch Port MAC Learning Register + * ----------------------------------------- + * Controls the Switch MAC learning policy per port. + */ +#define MLXSW_REG_SPMLR_ID 0x2018 +#define MLXSW_REG_SPMLR_LEN 0x8 + +static const struct mlxsw_reg_info mlxsw_reg_spmlr = { + .id = MLXSW_REG_SPMLR_ID, + .len = MLXSW_REG_SPMLR_LEN, +}; + +/* reg_spmlr_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, spmlr, local_port, 0x00, 16, 8); + +/* reg_spmlr_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, spmlr, sub_port, 0x00, 8, 8); + +enum mlxsw_reg_spmlr_learn_mode { + MLXSW_REG_SPMLR_LEARN_MODE_DISABLE = 0, + MLXSW_REG_SPMLR_LEARN_MODE_ENABLE = 2, + MLXSW_REG_SPMLR_LEARN_MODE_SEC = 3, +}; + +/* reg_spmlr_learn_mode + * Learning mode on the port. + * 0 - Learning disabled. + * 2 - Learning enabled. + * 3 - Security mode. + * + * In security mode the switch does not learn MACs on the port, but uses the + * SMAC to see if it exists on another ingress port. If so, the packet is + * classified as a bad packet and is discarded unless the software registers + * to receive port security error packets usign HPKT. + */ +MLXSW_ITEM32(reg, spmlr, learn_mode, 0x04, 30, 2); + +static inline void mlxsw_reg_spmlr_pack(char *payload, u8 local_port, + enum mlxsw_reg_spmlr_learn_mode mode) +{ + MLXSW_REG_ZERO(spmlr, payload); + mlxsw_reg_spmlr_local_port_set(payload, local_port); + mlxsw_reg_spmlr_sub_port_set(payload, 0); + mlxsw_reg_spmlr_learn_mode_set(payload, mode); +} + +/* SVFA - Switch VID to FID Allocation Register + * -------------------------------------------- + * Controls the VID to FID mapping and {Port, VID} to FID mapping for + * virtualized ports. + */ +#define MLXSW_REG_SVFA_ID 0x201C +#define MLXSW_REG_SVFA_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_svfa = { + .id = MLXSW_REG_SVFA_ID, + .len = MLXSW_REG_SVFA_LEN, +}; + +/* reg_svfa_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, svfa, swid, 0x00, 24, 8); + +/* reg_svfa_local_port + * Local port number. + * Access: Index + * + * Note: Reserved for 802.1Q FIDs. + */ +MLXSW_ITEM32(reg, svfa, local_port, 0x00, 16, 8); + +enum mlxsw_reg_svfa_mt { + MLXSW_REG_SVFA_MT_VID_TO_FID, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, +}; + +/* reg_svfa_mapping_table + * Mapping table: + * 0 - VID to FID + * 1 - {Port, VID} to FID + * Access: Index + * + * Note: Reserved for SwitchX-2. + */ +MLXSW_ITEM32(reg, svfa, mapping_table, 0x00, 8, 3); + +/* reg_svfa_v + * Valid. + * Valid if set. + * Access: RW + * + * Note: Reserved for SwitchX-2. + */ +MLXSW_ITEM32(reg, svfa, v, 0x00, 0, 1); + +/* reg_svfa_fid + * Filtering ID. + * Access: RW + */ +MLXSW_ITEM32(reg, svfa, fid, 0x04, 16, 16); + +/* reg_svfa_vid + * VLAN ID. + * Access: Index + */ +MLXSW_ITEM32(reg, svfa, vid, 0x04, 0, 12); + +/* reg_svfa_counter_set_type + * Counter set type for flow counters. + * Access: RW + * + * Note: Reserved for SwitchX-2. + */ +MLXSW_ITEM32(reg, svfa, counter_set_type, 0x08, 24, 8); + +/* reg_svfa_counter_index + * Counter index for flow counters. + * Access: RW + * + * Note: Reserved for SwitchX-2. + */ +MLXSW_ITEM32(reg, svfa, counter_index, 0x08, 0, 24); + +static inline void mlxsw_reg_svfa_pack(char *payload, u8 local_port, + enum mlxsw_reg_svfa_mt mt, bool valid, + u16 fid, u16 vid) +{ + MLXSW_REG_ZERO(svfa, payload); + local_port = mt == MLXSW_REG_SVFA_MT_VID_TO_FID ? 0 : local_port; + mlxsw_reg_svfa_swid_set(payload, 0); + mlxsw_reg_svfa_local_port_set(payload, local_port); + mlxsw_reg_svfa_mapping_table_set(payload, mt); + mlxsw_reg_svfa_v_set(payload, valid); + mlxsw_reg_svfa_fid_set(payload, fid); + mlxsw_reg_svfa_vid_set(payload, vid); +} + +/* SVPE - Switch Virtual-Port Enabling Register + * -------------------------------------------- + * Enables port virtualization. + */ +#define MLXSW_REG_SVPE_ID 0x201E +#define MLXSW_REG_SVPE_LEN 0x4 + +static const struct mlxsw_reg_info mlxsw_reg_svpe = { + .id = MLXSW_REG_SVPE_ID, + .len = MLXSW_REG_SVPE_LEN, +}; + +/* reg_svpe_local_port + * Local port number + * Access: Index + * + * Note: CPU port is not supported (uses VLAN mode only). + */ +MLXSW_ITEM32(reg, svpe, local_port, 0x00, 16, 8); + +/* reg_svpe_vp_en + * Virtual port enable. + * 0 - Disable, VLAN mode (VID to FID). + * 1 - Enable, Virtual port mode ({Port, VID} to FID). + * Access: RW + */ +MLXSW_ITEM32(reg, svpe, vp_en, 0x00, 8, 1); + +static inline void mlxsw_reg_svpe_pack(char *payload, u8 local_port, + bool enable) +{ + MLXSW_REG_ZERO(svpe, payload); + mlxsw_reg_svpe_local_port_set(payload, local_port); + mlxsw_reg_svpe_vp_en_set(payload, enable); +} + +/* SFMR - Switch FID Management Register + * ------------------------------------- + * Creates and configures FIDs. + */ +#define MLXSW_REG_SFMR_ID 0x201F +#define MLXSW_REG_SFMR_LEN 0x18 + +static const struct mlxsw_reg_info mlxsw_reg_sfmr = { + .id = MLXSW_REG_SFMR_ID, + .len = MLXSW_REG_SFMR_LEN, +}; + +enum mlxsw_reg_sfmr_op { + MLXSW_REG_SFMR_OP_CREATE_FID, + MLXSW_REG_SFMR_OP_DESTROY_FID, +}; + +/* reg_sfmr_op + * Operation. + * 0 - Create or edit FID. + * 1 - Destroy FID. + * Access: WO + */ +MLXSW_ITEM32(reg, sfmr, op, 0x00, 24, 4); + +/* reg_sfmr_fid + * Filtering ID. + * Access: Index + */ +MLXSW_ITEM32(reg, sfmr, fid, 0x00, 0, 16); + +/* reg_sfmr_fid_offset + * FID offset. + * Used to point into the flooding table selected by SFGC register if + * the table is of type FID-Offset. Otherwise, this field is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, sfmr, fid_offset, 0x08, 0, 16); + +/* reg_sfmr_vtfp + * Valid Tunnel Flood Pointer. + * If not set, then nve_tunnel_flood_ptr is reserved and considered NULL. + * Access: RW + * + * Note: Reserved for 802.1Q FIDs. + */ +MLXSW_ITEM32(reg, sfmr, vtfp, 0x0C, 31, 1); + +/* reg_sfmr_nve_tunnel_flood_ptr + * Underlay Flooding and BC Pointer. + * Used as a pointer to the first entry of the group based link lists of + * flooding or BC entries (for NVE tunnels). + * Access: RW + */ +MLXSW_ITEM32(reg, sfmr, nve_tunnel_flood_ptr, 0x0C, 0, 24); + +/* reg_sfmr_vv + * VNI Valid. + * If not set, then vni is reserved. + * Access: RW + * + * Note: Reserved for 802.1Q FIDs. + */ +MLXSW_ITEM32(reg, sfmr, vv, 0x10, 31, 1); + +/* reg_sfmr_vni + * Virtual Network Identifier. + * Access: RW + * + * Note: A given VNI can only be assigned to one FID. + */ +MLXSW_ITEM32(reg, sfmr, vni, 0x10, 0, 24); + +static inline void mlxsw_reg_sfmr_pack(char *payload, + enum mlxsw_reg_sfmr_op op, u16 fid, + u16 fid_offset) +{ + MLXSW_REG_ZERO(sfmr, payload); + mlxsw_reg_sfmr_op_set(payload, op); + mlxsw_reg_sfmr_fid_set(payload, fid); + mlxsw_reg_sfmr_fid_offset_set(payload, fid_offset); + mlxsw_reg_sfmr_vtfp_set(payload, false); + mlxsw_reg_sfmr_vv_set(payload, false); +} + +/* SPVMLR - Switch Port VLAN MAC Learning Register + * ----------------------------------------------- + * Controls the switch MAC learning policy per {Port, VID}. + */ +#define MLXSW_REG_SPVMLR_ID 0x2020 +#define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */ +#define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256 +#define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \ + MLXSW_REG_SPVMLR_REC_LEN * \ + MLXSW_REG_SPVMLR_REC_MAX_COUNT) + +static const struct mlxsw_reg_info mlxsw_reg_spvmlr = { + .id = MLXSW_REG_SPVMLR_ID, + .len = MLXSW_REG_SPVMLR_LEN, +}; + +/* reg_spvmlr_local_port + * Local ingress port. + * Access: Index + * + * Note: CPU port is not supported. + */ +MLXSW_ITEM32(reg, spvmlr, local_port, 0x00, 16, 8); + +/* reg_spvmlr_num_rec + * Number of records to update. + * Access: OP + */ +MLXSW_ITEM32(reg, spvmlr, num_rec, 0x00, 0, 8); + +/* reg_spvmlr_rec_learn_enable + * 0 - Disable learning for {Port, VID}. + * 1 - Enable learning for {Port, VID}. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, spvmlr, rec_learn_enable, MLXSW_REG_SPVMLR_BASE_LEN, + 31, 1, MLXSW_REG_SPVMLR_REC_LEN, 0x00, false); + +/* reg_spvmlr_rec_vid + * VLAN ID to be added/removed from port or for querying. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, spvmlr, rec_vid, MLXSW_REG_SPVMLR_BASE_LEN, 0, 12, + MLXSW_REG_SPVMLR_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, + u16 vid_begin, u16 vid_end, + bool learn_enable) +{ + int num_rec = vid_end - vid_begin + 1; + int i; + + WARN_ON(num_rec < 1 || num_rec > MLXSW_REG_SPVMLR_REC_MAX_COUNT); + + MLXSW_REG_ZERO(spvmlr, payload); + mlxsw_reg_spvmlr_local_port_set(payload, local_port); + mlxsw_reg_spvmlr_num_rec_set(payload, num_rec); + + for (i = 0; i < num_rec; i++) { + mlxsw_reg_spvmlr_rec_learn_enable_set(payload, i, learn_enable); + mlxsw_reg_spvmlr_rec_vid_set(payload, i, vid_begin + i); + } +} + +/* PMLP - Ports Module to Local Port Register + * ------------------------------------------ + * Configures the assignment of modules to local ports. + */ +#define MLXSW_REG_PMLP_ID 0x5002 +#define MLXSW_REG_PMLP_LEN 0x40 + +static const struct mlxsw_reg_info mlxsw_reg_pmlp = { + .id = MLXSW_REG_PMLP_ID, + .len = MLXSW_REG_PMLP_LEN, +}; + +/* reg_pmlp_rxtx + * 0 - Tx value is used for both Tx and Rx. + * 1 - Rx value is taken from a separte field. + * Access: RW + */ +MLXSW_ITEM32(reg, pmlp, rxtx, 0x00, 31, 1); + +/* reg_pmlp_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmlp, local_port, 0x00, 16, 8); + +/* reg_pmlp_width + * 0 - Unmap local port. + * 1 - Lane 0 is used. + * 2 - Lanes 0 and 1 are used. + * 4 - Lanes 0, 1, 2 and 3 are used. + * Access: RW + */ +MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8); + +/* reg_pmlp_module + * Module number. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0, false); + +/* reg_pmlp_tx_lane + * Tx Lane. When rxtx field is cleared, this field is used for Rx as well. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 16, false); + +/* reg_pmlp_rx_lane + * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is + * equal to Tx lane. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 24, false); + +static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(pmlp, payload); + mlxsw_reg_pmlp_local_port_set(payload, local_port); +} + +/* PMTU - Port MTU Register + * ------------------------ + * Configures and reports the port MTU. + */ +#define MLXSW_REG_PMTU_ID 0x5003 +#define MLXSW_REG_PMTU_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_pmtu = { + .id = MLXSW_REG_PMTU_ID, + .len = MLXSW_REG_PMTU_LEN, +}; + +/* reg_pmtu_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtu, local_port, 0x00, 16, 8); + +/* reg_pmtu_max_mtu + * Maximum MTU. + * When port type (e.g. Ethernet) is configured, the relevant MTU is + * reported, otherwise the minimum between the max_mtu of the different + * types is reported. + * Access: RO + */ +MLXSW_ITEM32(reg, pmtu, max_mtu, 0x04, 16, 16); + +/* reg_pmtu_admin_mtu + * MTU value to set port to. Must be smaller or equal to max_mtu. + * Note: If port type is Infiniband, then port must be disabled, when its + * MTU is set. + * Access: RW + */ +MLXSW_ITEM32(reg, pmtu, admin_mtu, 0x08, 16, 16); + +/* reg_pmtu_oper_mtu + * The actual MTU configured on the port. Packets exceeding this size + * will be dropped. + * Note: In Ethernet and FC oper_mtu == admin_mtu, however, in Infiniband + * oper_mtu might be smaller than admin_mtu. + * Access: RO + */ +MLXSW_ITEM32(reg, pmtu, oper_mtu, 0x0C, 16, 16); + +static inline void mlxsw_reg_pmtu_pack(char *payload, u8 local_port, + u16 new_mtu) +{ + MLXSW_REG_ZERO(pmtu, payload); + mlxsw_reg_pmtu_local_port_set(payload, local_port); + mlxsw_reg_pmtu_max_mtu_set(payload, 0); + mlxsw_reg_pmtu_admin_mtu_set(payload, new_mtu); + mlxsw_reg_pmtu_oper_mtu_set(payload, 0); +} + +/* PTYS - Port Type and Speed Register + * ----------------------------------- + * Configures and reports the port speed type. + * + * Note: When set while the link is up, the changes will not take effect + * until the port transitions from down to up state. + */ +#define MLXSW_REG_PTYS_ID 0x5004 +#define MLXSW_REG_PTYS_LEN 0x40 + +static const struct mlxsw_reg_info mlxsw_reg_ptys = { + .id = MLXSW_REG_PTYS_ID, + .len = MLXSW_REG_PTYS_LEN, +}; + +/* reg_ptys_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8); + +#define MLXSW_REG_PTYS_PROTO_MASK_ETH BIT(2) + +/* reg_ptys_proto_mask + * Protocol mask. Indicates which protocol is used. + * 0 - Infiniband. + * 1 - Fibre Channel. + * 2 - Ethernet. + * Access: Index + */ +MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3); + +#define MLXSW_REG_PTYS_ETH_SPEED_SGMII BIT(0) +#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX BIT(1) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 BIT(3) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR BIT(4) +#define MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2 BIT(5) +#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 BIT(6) +#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 BIT(7) +#define MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4 BIT(8) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR BIT(12) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR BIT(13) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR BIT(14) +#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 BIT(15) +#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4 BIT(16) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 BIT(19) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23) +#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX BIT(24) +#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(25) +#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T BIT(26) +#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27) +#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28) +#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 BIT(30) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2 BIT(31) + +/* reg_ptys_eth_proto_cap + * Ethernet port supported speeds and protocols. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, eth_proto_cap, 0x0C, 0, 32); + +/* reg_ptys_eth_proto_admin + * Speed and protocol to set port to. + * Access: RW + */ +MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32); + +/* reg_ptys_eth_proto_oper + * The current speed and protocol configured for the port. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32); + +static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port, + u32 proto_admin) +{ + MLXSW_REG_ZERO(ptys, payload); + mlxsw_reg_ptys_local_port_set(payload, local_port); + mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH); + mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin); +} + +static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap, + u32 *p_eth_proto_adm, + u32 *p_eth_proto_oper) +{ + if (p_eth_proto_cap) + *p_eth_proto_cap = mlxsw_reg_ptys_eth_proto_cap_get(payload); + if (p_eth_proto_adm) + *p_eth_proto_adm = mlxsw_reg_ptys_eth_proto_admin_get(payload); + if (p_eth_proto_oper) + *p_eth_proto_oper = mlxsw_reg_ptys_eth_proto_oper_get(payload); +} + +/* PPAD - Port Physical Address Register + * ------------------------------------- + * The PPAD register configures the per port physical MAC address. + */ +#define MLXSW_REG_PPAD_ID 0x5005 +#define MLXSW_REG_PPAD_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_ppad = { + .id = MLXSW_REG_PPAD_ID, + .len = MLXSW_REG_PPAD_LEN, +}; + +/* reg_ppad_single_base_mac + * 0: base_mac, local port should be 0 and mac[7:0] is + * reserved. HW will set incremental + * 1: single_mac - mac of the local_port + * Access: RW + */ +MLXSW_ITEM32(reg, ppad, single_base_mac, 0x00, 28, 1); + +/* reg_ppad_local_port + * port number, if single_base_mac = 0 then local_port is reserved + * Access: RW + */ +MLXSW_ITEM32(reg, ppad, local_port, 0x00, 16, 8); + +/* reg_ppad_mac + * If single_base_mac = 0 - base MAC address, mac[7:0] is reserved. + * If single_base_mac = 1 - the per port MAC address + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ppad, mac, 0x02, 6); + +static inline void mlxsw_reg_ppad_pack(char *payload, bool single_base_mac, + u8 local_port) +{ + MLXSW_REG_ZERO(ppad, payload); + mlxsw_reg_ppad_single_base_mac_set(payload, !!single_base_mac); + mlxsw_reg_ppad_local_port_set(payload, local_port); +} + +/* PAOS - Ports Administrative and Operational Status Register + * ----------------------------------------------------------- + * Configures and retrieves per port administrative and operational status. + */ +#define MLXSW_REG_PAOS_ID 0x5006 +#define MLXSW_REG_PAOS_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_paos = { + .id = MLXSW_REG_PAOS_ID, + .len = MLXSW_REG_PAOS_LEN, +}; + +/* reg_paos_swid + * Switch partition ID with which to associate the port. + * Note: while external ports uses unique local port numbers (and thus swid is + * redundant), router ports use the same local port number where swid is the + * only indication for the relevant port. + * Access: Index + */ +MLXSW_ITEM32(reg, paos, swid, 0x00, 24, 8); + +/* reg_paos_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, paos, local_port, 0x00, 16, 8); + +/* reg_paos_admin_status + * Port administrative state (the desired state of the port): + * 1 - Up. + * 2 - Down. + * 3 - Up once. This means that in case of link failure, the port won't go + * into polling mode, but will wait to be re-enabled by software. + * 4 - Disabled by system. Can only be set by hardware. + * Access: RW + */ +MLXSW_ITEM32(reg, paos, admin_status, 0x00, 8, 4); + +/* reg_paos_oper_status + * Port operational state (the current state): + * 1 - Up. + * 2 - Down. + * 3 - Down by port failure. This means that the device will not let the + * port up again until explicitly specified by software. + * Access: RO + */ +MLXSW_ITEM32(reg, paos, oper_status, 0x00, 0, 4); + +/* reg_paos_ase + * Admin state update enabled. + * Access: WO + */ +MLXSW_ITEM32(reg, paos, ase, 0x04, 31, 1); + +/* reg_paos_ee + * Event update enable. If this bit is set, event generation will be + * updated based on the e field. + * Access: WO + */ +MLXSW_ITEM32(reg, paos, ee, 0x04, 30, 1); + +/* reg_paos_e + * Event generation on operational state change: + * 0 - Do not generate event. + * 1 - Generate Event. + * 2 - Generate Single Event. + * Access: RW + */ +MLXSW_ITEM32(reg, paos, e, 0x04, 0, 2); + +static inline void mlxsw_reg_paos_pack(char *payload, u8 local_port, + enum mlxsw_port_admin_status status) +{ + MLXSW_REG_ZERO(paos, payload); + mlxsw_reg_paos_swid_set(payload, 0); + mlxsw_reg_paos_local_port_set(payload, local_port); + mlxsw_reg_paos_admin_status_set(payload, status); + mlxsw_reg_paos_oper_status_set(payload, 0); + mlxsw_reg_paos_ase_set(payload, 1); + mlxsw_reg_paos_ee_set(payload, 1); + mlxsw_reg_paos_e_set(payload, 1); +} + +/* PPCNT - Ports Performance Counters Register + * ------------------------------------------- + * The PPCNT register retrieves per port performance counters. + */ +#define MLXSW_REG_PPCNT_ID 0x5008 +#define MLXSW_REG_PPCNT_LEN 0x100 + +static const struct mlxsw_reg_info mlxsw_reg_ppcnt = { + .id = MLXSW_REG_PPCNT_ID, + .len = MLXSW_REG_PPCNT_LEN, +}; + +/* reg_ppcnt_swid + * For HCA: must be always 0. + * Switch partition ID to associate port with. + * Switch partitions are numbered from 0 to 7 inclusively. + * Switch partition 254 indicates stacking ports. + * Switch partition 255 indicates all switch partitions. + * Only valid on Set() operation with local_port=255. + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, swid, 0x00, 24, 8); + +/* reg_ppcnt_local_port + * Local port number. + * 255 indicates all ports on the device, and is only allowed + * for Set() operation. + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, local_port, 0x00, 16, 8); + +/* reg_ppcnt_pnat + * Port number access type: + * 0 - Local port number + * 1 - IB port number + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); + +/* reg_ppcnt_grp + * Performance counter group. + * Group 63 indicates all groups. Only valid on Set() operation with + * clr bit set. + * 0x0: IEEE 802.3 Counters + * 0x1: RFC 2863 Counters + * 0x2: RFC 2819 Counters + * 0x3: RFC 3635 Counters + * 0x5: Ethernet Extended Counters + * 0x8: Link Level Retransmission Counters + * 0x10: Per Priority Counters + * 0x11: Per Traffic Class Counters + * 0x12: Physical Layer Counters + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, grp, 0x00, 0, 6); + +/* reg_ppcnt_clr + * Clear counters. Setting the clr bit will reset the counter value + * for all counters in the counter group. This bit can be set + * for both Set() and Get() operation. + * Access: OP + */ +MLXSW_ITEM32(reg, ppcnt, clr, 0x04, 31, 1); + +/* reg_ppcnt_prio_tc + * Priority for counter set that support per priority, valid values: 0-7. + * Traffic class for counter set that support per traffic class, + * valid values: 0- cap_max_tclass-1 . + * For HCA: cap_max_tclass is always 8. + * Otherwise must be 0. + * Access: Index + */ +MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5); + +/* reg_ppcnt_a_frames_transmitted_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_frames_transmitted_ok, + 0x08 + 0x00, 0, 64); + +/* reg_ppcnt_a_frames_received_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_frames_received_ok, + 0x08 + 0x08, 0, 64); + +/* reg_ppcnt_a_frame_check_sequence_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_frame_check_sequence_errors, + 0x08 + 0x10, 0, 64); + +/* reg_ppcnt_a_alignment_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_alignment_errors, + 0x08 + 0x18, 0, 64); + +/* reg_ppcnt_a_octets_transmitted_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_octets_transmitted_ok, + 0x08 + 0x20, 0, 64); + +/* reg_ppcnt_a_octets_received_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_octets_received_ok, + 0x08 + 0x28, 0, 64); + +/* reg_ppcnt_a_multicast_frames_xmitted_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_xmitted_ok, + 0x08 + 0x30, 0, 64); + +/* reg_ppcnt_a_broadcast_frames_xmitted_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_xmitted_ok, + 0x08 + 0x38, 0, 64); + +/* reg_ppcnt_a_multicast_frames_received_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_received_ok, + 0x08 + 0x40, 0, 64); + +/* reg_ppcnt_a_broadcast_frames_received_ok + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_received_ok, + 0x08 + 0x48, 0, 64); + +/* reg_ppcnt_a_in_range_length_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_in_range_length_errors, + 0x08 + 0x50, 0, 64); + +/* reg_ppcnt_a_out_of_range_length_field + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_out_of_range_length_field, + 0x08 + 0x58, 0, 64); + +/* reg_ppcnt_a_frame_too_long_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_frame_too_long_errors, + 0x08 + 0x60, 0, 64); + +/* reg_ppcnt_a_symbol_error_during_carrier + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_symbol_error_during_carrier, + 0x08 + 0x68, 0, 64); + +/* reg_ppcnt_a_mac_control_frames_transmitted + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_transmitted, + 0x08 + 0x70, 0, 64); + +/* reg_ppcnt_a_mac_control_frames_received + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_received, + 0x08 + 0x78, 0, 64); + +/* reg_ppcnt_a_unsupported_opcodes_received + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_unsupported_opcodes_received, + 0x08 + 0x80, 0, 64); + +/* reg_ppcnt_a_pause_mac_ctrl_frames_received + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, + 0x08 + 0x88, 0, 64); + +/* reg_ppcnt_a_pause_mac_ctrl_frames_transmitted + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, + 0x08 + 0x90, 0, 64); + +static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(ppcnt, payload); + mlxsw_reg_ppcnt_swid_set(payload, 0); + mlxsw_reg_ppcnt_local_port_set(payload, local_port); + mlxsw_reg_ppcnt_pnat_set(payload, 0); + mlxsw_reg_ppcnt_grp_set(payload, 0); + mlxsw_reg_ppcnt_clr_set(payload, 0); + mlxsw_reg_ppcnt_prio_tc_set(payload, 0); +} + +/* PBMC - Port Buffer Management Control Register + * ---------------------------------------------- + * The PBMC register configures and retrieves the port packet buffer + * allocation for different Prios, and the Pause threshold management. + */ +#define MLXSW_REG_PBMC_ID 0x500C +#define MLXSW_REG_PBMC_LEN 0x68 + +static const struct mlxsw_reg_info mlxsw_reg_pbmc = { + .id = MLXSW_REG_PBMC_ID, + .len = MLXSW_REG_PBMC_LEN, +}; + +/* reg_pbmc_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pbmc, local_port, 0x00, 16, 8); + +/* reg_pbmc_xoff_timer_value + * When device generates a pause frame, it uses this value as the pause + * timer (time for the peer port to pause in quota-512 bit time). + * Access: RW + */ +MLXSW_ITEM32(reg, pbmc, xoff_timer_value, 0x04, 16, 16); + +/* reg_pbmc_xoff_refresh + * The time before a new pause frame should be sent to refresh the pause RW + * state. Using the same units as xoff_timer_value above (in quota-512 bit + * time). + * Access: RW + */ +MLXSW_ITEM32(reg, pbmc, xoff_refresh, 0x04, 0, 16); + +/* reg_pbmc_buf_lossy + * The field indicates if the buffer is lossy. + * 0 - Lossless + * 1 - Lossy + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_lossy, 0x0C, 25, 1, 0x08, 0x00, false); + +/* reg_pbmc_buf_epsb + * Eligible for Port Shared buffer. + * If epsb is set, packets assigned to buffer are allowed to insert the port + * shared buffer. + * When buf_lossy is MLXSW_REG_PBMC_LOSSY_LOSSY this field is reserved. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_epsb, 0x0C, 24, 1, 0x08, 0x00, false); + +/* reg_pbmc_buf_size + * The part of the packet buffer array is allocated for the specific buffer. + * Units are represented in cells. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_size, 0x0C, 0, 16, 0x08, 0x00, false); + +static inline void mlxsw_reg_pbmc_pack(char *payload, u8 local_port, + u16 xoff_timer_value, u16 xoff_refresh) +{ + MLXSW_REG_ZERO(pbmc, payload); + mlxsw_reg_pbmc_local_port_set(payload, local_port); + mlxsw_reg_pbmc_xoff_timer_value_set(payload, xoff_timer_value); + mlxsw_reg_pbmc_xoff_refresh_set(payload, xoff_refresh); +} + +static inline void mlxsw_reg_pbmc_lossy_buffer_pack(char *payload, + int buf_index, + u16 size) +{ + mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 1); + mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); +} + +/* PSPA - Port Switch Partition Allocation + * --------------------------------------- + * Controls the association of a port with a switch partition and enables + * configuring ports as stacking ports. + */ +#define MLXSW_REG_PSPA_ID 0x500D +#define MLXSW_REG_PSPA_LEN 0x8 + +static const struct mlxsw_reg_info mlxsw_reg_pspa = { + .id = MLXSW_REG_PSPA_ID, + .len = MLXSW_REG_PSPA_LEN, +}; + +/* reg_pspa_swid + * Switch partition ID. + * Access: RW + */ +MLXSW_ITEM32(reg, pspa, swid, 0x00, 24, 8); + +/* reg_pspa_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pspa, local_port, 0x00, 16, 8); + +/* reg_pspa_sub_port + * Virtual port within the local port. Set to 0 when virtual ports are + * disabled on the local port. + * Access: Index + */ +MLXSW_ITEM32(reg, pspa, sub_port, 0x00, 8, 8); + +static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port) +{ + MLXSW_REG_ZERO(pspa, payload); + mlxsw_reg_pspa_swid_set(payload, swid); + mlxsw_reg_pspa_local_port_set(payload, local_port); + mlxsw_reg_pspa_sub_port_set(payload, 0); +} + +/* HTGT - Host Trap Group Table + * ---------------------------- + * Configures the properties for forwarding to CPU. + */ +#define MLXSW_REG_HTGT_ID 0x7002 +#define MLXSW_REG_HTGT_LEN 0x100 + +static const struct mlxsw_reg_info mlxsw_reg_htgt = { + .id = MLXSW_REG_HTGT_ID, + .len = MLXSW_REG_HTGT_LEN, +}; + +/* reg_htgt_swid + * Switch partition ID. + * Access: Index + */ +MLXSW_ITEM32(reg, htgt, swid, 0x00, 24, 8); + +#define MLXSW_REG_HTGT_PATH_TYPE_LOCAL 0x0 /* For locally attached CPU */ + +/* reg_htgt_type + * CPU path type. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4); + +enum mlxsw_reg_htgt_trap_group { + MLXSW_REG_HTGT_TRAP_GROUP_EMAD, + MLXSW_REG_HTGT_TRAP_GROUP_RX, + MLXSW_REG_HTGT_TRAP_GROUP_CTRL, +}; + +/* reg_htgt_trap_group + * Trap group number. User defined number specifying which trap groups + * should be forwarded to the CPU. The mapping between trap IDs and trap + * groups is configured using HPKT register. + * Access: Index + */ +MLXSW_ITEM32(reg, htgt, trap_group, 0x00, 0, 8); + +enum { + MLXSW_REG_HTGT_POLICER_DISABLE, + MLXSW_REG_HTGT_POLICER_ENABLE, +}; + +/* reg_htgt_pide + * Enable policer ID specified using 'pid' field. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, pide, 0x04, 15, 1); + +/* reg_htgt_pid + * Policer ID for the trap group. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, pid, 0x04, 0, 8); + +#define MLXSW_REG_HTGT_TRAP_TO_CPU 0x0 + +/* reg_htgt_mirror_action + * Mirror action to use. + * 0 - Trap to CPU. + * 1 - Trap to CPU and mirror to a mirroring agent. + * 2 - Mirror to a mirroring agent and do not trap to CPU. + * Access: RW + * + * Note: Mirroring to a mirroring agent is only supported in Spectrum. + */ +MLXSW_ITEM32(reg, htgt, mirror_action, 0x08, 8, 2); + +/* reg_htgt_mirroring_agent + * Mirroring agent. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, mirroring_agent, 0x08, 0, 3); + +/* reg_htgt_priority + * Trap group priority. + * In case a packet matches multiple classification rules, the packet will + * only be trapped once, based on the trap ID associated with the group (via + * register HPKT) with the highest priority. + * Supported values are 0-7, with 7 represnting the highest priority. + * Access: RW + * + * Note: In SwitchX-2 this field is ignored and the priority value is replaced + * by the 'trap_group' field. + */ +MLXSW_ITEM32(reg, htgt, priority, 0x0C, 0, 4); + +/* reg_htgt_local_path_cpu_tclass + * CPU ingress traffic class for the trap group. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, local_path_cpu_tclass, 0x10, 16, 6); + +#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_EMAD 0x15 +#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_RX 0x14 +#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_CTRL 0x13 + +/* reg_htgt_local_path_rdq + * Receive descriptor queue (RDQ) to use for the trap group. + * Access: RW + */ +MLXSW_ITEM32(reg, htgt, local_path_rdq, 0x10, 0, 6); + +static inline void mlxsw_reg_htgt_pack(char *payload, + enum mlxsw_reg_htgt_trap_group group) +{ + u8 swid, rdq; + + MLXSW_REG_ZERO(htgt, payload); + switch (group) { + case MLXSW_REG_HTGT_TRAP_GROUP_EMAD: + swid = MLXSW_PORT_SWID_ALL_SWIDS; + rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_EMAD; + break; + case MLXSW_REG_HTGT_TRAP_GROUP_RX: + swid = 0; + rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_RX; + break; + case MLXSW_REG_HTGT_TRAP_GROUP_CTRL: + swid = 0; + rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_CTRL; + break; + } + mlxsw_reg_htgt_swid_set(payload, swid); + mlxsw_reg_htgt_type_set(payload, MLXSW_REG_HTGT_PATH_TYPE_LOCAL); + mlxsw_reg_htgt_trap_group_set(payload, group); + mlxsw_reg_htgt_pide_set(payload, MLXSW_REG_HTGT_POLICER_DISABLE); + mlxsw_reg_htgt_pid_set(payload, 0); + mlxsw_reg_htgt_mirror_action_set(payload, MLXSW_REG_HTGT_TRAP_TO_CPU); + mlxsw_reg_htgt_mirroring_agent_set(payload, 0); + mlxsw_reg_htgt_priority_set(payload, 0); + mlxsw_reg_htgt_local_path_cpu_tclass_set(payload, 7); + mlxsw_reg_htgt_local_path_rdq_set(payload, rdq); +} + +/* HPKT - Host Packet Trap + * ----------------------- + * Configures trap IDs inside trap groups. + */ +#define MLXSW_REG_HPKT_ID 0x7003 +#define MLXSW_REG_HPKT_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_hpkt = { + .id = MLXSW_REG_HPKT_ID, + .len = MLXSW_REG_HPKT_LEN, +}; + +enum { + MLXSW_REG_HPKT_ACK_NOT_REQUIRED, + MLXSW_REG_HPKT_ACK_REQUIRED, +}; + +/* reg_hpkt_ack + * Require acknowledgements from the host for events. + * If set, then the device will wait for the event it sent to be acknowledged + * by the host. This option is only relevant for event trap IDs. + * Access: RW + * + * Note: Currently not supported by firmware. + */ +MLXSW_ITEM32(reg, hpkt, ack, 0x00, 24, 1); + +enum mlxsw_reg_hpkt_action { + MLXSW_REG_HPKT_ACTION_FORWARD, + MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, + MLXSW_REG_HPKT_ACTION_MIRROR_TO_CPU, + MLXSW_REG_HPKT_ACTION_DISCARD, + MLXSW_REG_HPKT_ACTION_SOFT_DISCARD, + MLXSW_REG_HPKT_ACTION_TRAP_AND_SOFT_DISCARD, +}; + +/* reg_hpkt_action + * Action to perform on packet when trapped. + * 0 - No action. Forward to CPU based on switching rules. + * 1 - Trap to CPU (CPU receives sole copy). + * 2 - Mirror to CPU (CPU receives a replica of the packet). + * 3 - Discard. + * 4 - Soft discard (allow other traps to act on the packet). + * 5 - Trap and soft discard (allow other traps to overwrite this trap). + * Access: RW + * + * Note: Must be set to 0 (forward) for event trap IDs, as they are already + * addressed to the CPU. + */ +MLXSW_ITEM32(reg, hpkt, action, 0x00, 20, 3); + +/* reg_hpkt_trap_group + * Trap group to associate the trap with. + * Access: RW + */ +MLXSW_ITEM32(reg, hpkt, trap_group, 0x00, 12, 6); + +/* reg_hpkt_trap_id + * Trap ID. + * Access: Index + * + * Note: A trap ID can only be associated with a single trap group. The device + * will associate the trap ID with the last trap group configured. + */ +MLXSW_ITEM32(reg, hpkt, trap_id, 0x00, 0, 9); + +enum { + MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT, + MLXSW_REG_HPKT_CTRL_PACKET_NO_BUFFER, + MLXSW_REG_HPKT_CTRL_PACKET_USE_BUFFER, +}; + +/* reg_hpkt_ctrl + * Configure dedicated buffer resources for control packets. + * 0 - Keep factory defaults. + * 1 - Do not use control buffer for this trap ID. + * 2 - Use control buffer for this trap ID. + * Access: RW + */ +MLXSW_ITEM32(reg, hpkt, ctrl, 0x04, 16, 2); + +static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) +{ + enum mlxsw_reg_htgt_trap_group trap_group; + + MLXSW_REG_ZERO(hpkt, payload); + mlxsw_reg_hpkt_ack_set(payload, MLXSW_REG_HPKT_ACK_NOT_REQUIRED); + mlxsw_reg_hpkt_action_set(payload, action); + switch (trap_id) { + case MLXSW_TRAP_ID_ETHEMAD: + case MLXSW_TRAP_ID_PUDE: + trap_group = MLXSW_REG_HTGT_TRAP_GROUP_EMAD; + break; + default: + trap_group = MLXSW_REG_HTGT_TRAP_GROUP_RX; + break; + } + mlxsw_reg_hpkt_trap_group_set(payload, trap_group); + mlxsw_reg_hpkt_trap_id_set(payload, trap_id); + mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT); +} + +/* SBPR - Shared Buffer Pools Register + * ----------------------------------- + * The SBPR configures and retrieves the shared buffer pools and configuration. + */ +#define MLXSW_REG_SBPR_ID 0xB001 +#define MLXSW_REG_SBPR_LEN 0x14 + +static const struct mlxsw_reg_info mlxsw_reg_sbpr = { + .id = MLXSW_REG_SBPR_ID, + .len = MLXSW_REG_SBPR_LEN, +}; + +enum mlxsw_reg_sbpr_dir { + MLXSW_REG_SBPR_DIR_INGRESS, + MLXSW_REG_SBPR_DIR_EGRESS, +}; + +/* reg_sbpr_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpr, dir, 0x00, 24, 2); + +/* reg_sbpr_pool + * Pool index. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpr, pool, 0x00, 0, 4); + +/* reg_sbpr_size + * Pool size in buffer cells. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpr, size, 0x04, 0, 24); + +enum mlxsw_reg_sbpr_mode { + MLXSW_REG_SBPR_MODE_STATIC, + MLXSW_REG_SBPR_MODE_DYNAMIC, +}; + +/* reg_sbpr_mode + * Pool quota calculation mode. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4); + +static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool, + enum mlxsw_reg_sbpr_dir dir, + enum mlxsw_reg_sbpr_mode mode, u32 size) +{ + MLXSW_REG_ZERO(sbpr, payload); + mlxsw_reg_sbpr_pool_set(payload, pool); + mlxsw_reg_sbpr_dir_set(payload, dir); + mlxsw_reg_sbpr_mode_set(payload, mode); + mlxsw_reg_sbpr_size_set(payload, size); +} + +/* SBCM - Shared Buffer Class Management Register + * ---------------------------------------------- + * The SBCM register configures and retrieves the shared buffer allocation + * and configuration according to Port-PG, including the binding to pool + * and definition of the associated quota. + */ +#define MLXSW_REG_SBCM_ID 0xB002 +#define MLXSW_REG_SBCM_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_sbcm = { + .id = MLXSW_REG_SBCM_ID, + .len = MLXSW_REG_SBCM_LEN, +}; + +/* reg_sbcm_local_port + * Local port number. + * For Ingress: excludes CPU port and Router port + * For Egress: excludes IP Router + * Access: Index + */ +MLXSW_ITEM32(reg, sbcm, local_port, 0x00, 16, 8); + +/* reg_sbcm_pg_buff + * PG buffer - Port PG (dir=ingress) / traffic class (dir=egress) + * For PG buffer: range is 0..cap_max_pg_buffers - 1 + * For traffic class: range is 0..cap_max_tclass - 1 + * Note that when traffic class is in MC aware mode then the traffic + * classes which are MC aware cannot be configured. + * Access: Index + */ +MLXSW_ITEM32(reg, sbcm, pg_buff, 0x00, 8, 6); + +enum mlxsw_reg_sbcm_dir { + MLXSW_REG_SBCM_DIR_INGRESS, + MLXSW_REG_SBCM_DIR_EGRESS, +}; + +/* reg_sbcm_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, sbcm, dir, 0x00, 0, 2); + +/* reg_sbcm_min_buff + * Minimum buffer size for the limiter, in cells. + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); + +/* reg_sbcm_max_buff + * When the pool associated to the port-pg/tclass is configured to + * static, Maximum buffer size for the limiter configured in cells. + * When the pool associated to the port-pg/tclass is configured to + * dynamic, the max_buff holds the "alpha" parameter, supporting + * the following values: + * 0: 0 + * i: (1/128)*2^(i-1), for i=1..14 + * 0xFF: Infinity + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24); + +/* reg_sbcm_pool + * Association of the port-priority to a pool. + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4); + +static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, + enum mlxsw_reg_sbcm_dir dir, + u32 min_buff, u32 max_buff, u8 pool) +{ + MLXSW_REG_ZERO(sbcm, payload); + mlxsw_reg_sbcm_local_port_set(payload, local_port); + mlxsw_reg_sbcm_pg_buff_set(payload, pg_buff); + mlxsw_reg_sbcm_dir_set(payload, dir); + mlxsw_reg_sbcm_min_buff_set(payload, min_buff); + mlxsw_reg_sbcm_max_buff_set(payload, max_buff); + mlxsw_reg_sbcm_pool_set(payload, pool); +} + +/* SBPM - Shared Buffer Class Management Register + * ---------------------------------------------- + * The SBPM register configures and retrieves the shared buffer allocation + * and configuration according to Port-Pool, including the definition + * of the associated quota. + */ +#define MLXSW_REG_SBPM_ID 0xB003 +#define MLXSW_REG_SBPM_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_sbpm = { + .id = MLXSW_REG_SBPM_ID, + .len = MLXSW_REG_SBPM_LEN, +}; + +/* reg_sbpm_local_port + * Local port number. + * For Ingress: excludes CPU port and Router port + * For Egress: excludes IP Router + * Access: Index + */ +MLXSW_ITEM32(reg, sbpm, local_port, 0x00, 16, 8); + +/* reg_sbpm_pool + * The pool associated to quota counting on the local_port. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpm, pool, 0x00, 8, 4); + +enum mlxsw_reg_sbpm_dir { + MLXSW_REG_SBPM_DIR_INGRESS, + MLXSW_REG_SBPM_DIR_EGRESS, +}; + +/* reg_sbpm_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, sbpm, dir, 0x00, 0, 2); + +/* reg_sbpm_min_buff + * Minimum buffer size for the limiter, in cells. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpm, min_buff, 0x18, 0, 24); + +/* reg_sbpm_max_buff + * When the pool associated to the port-pg/tclass is configured to + * static, Maximum buffer size for the limiter configured in cells. + * When the pool associated to the port-pg/tclass is configured to + * dynamic, the max_buff holds the "alpha" parameter, supporting + * the following values: + * 0: 0 + * i: (1/128)*2^(i-1), for i=1..14 + * 0xFF: Infinity + * Access: RW + */ +MLXSW_ITEM32(reg, sbpm, max_buff, 0x1C, 0, 24); + +static inline void mlxsw_reg_sbpm_pack(char *payload, u8 local_port, u8 pool, + enum mlxsw_reg_sbpm_dir dir, + u32 min_buff, u32 max_buff) +{ + MLXSW_REG_ZERO(sbpm, payload); + mlxsw_reg_sbpm_local_port_set(payload, local_port); + mlxsw_reg_sbpm_pool_set(payload, pool); + mlxsw_reg_sbpm_dir_set(payload, dir); + mlxsw_reg_sbpm_min_buff_set(payload, min_buff); + mlxsw_reg_sbpm_max_buff_set(payload, max_buff); +} + +/* SBMM - Shared Buffer Multicast Management Register + * -------------------------------------------------- + * The SBMM register configures and retrieves the shared buffer allocation + * and configuration for MC packets according to Switch-Priority, including + * the binding to pool and definition of the associated quota. + */ +#define MLXSW_REG_SBMM_ID 0xB004 +#define MLXSW_REG_SBMM_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_sbmm = { + .id = MLXSW_REG_SBMM_ID, + .len = MLXSW_REG_SBMM_LEN, +}; + +/* reg_sbmm_prio + * Switch Priority. + * Access: Index + */ +MLXSW_ITEM32(reg, sbmm, prio, 0x00, 8, 4); + +/* reg_sbmm_min_buff + * Minimum buffer size for the limiter, in cells. + * Access: RW + */ +MLXSW_ITEM32(reg, sbmm, min_buff, 0x18, 0, 24); + +/* reg_sbmm_max_buff + * When the pool associated to the port-pg/tclass is configured to + * static, Maximum buffer size for the limiter configured in cells. + * When the pool associated to the port-pg/tclass is configured to + * dynamic, the max_buff holds the "alpha" parameter, supporting + * the following values: + * 0: 0 + * i: (1/128)*2^(i-1), for i=1..14 + * 0xFF: Infinity + * Access: RW + */ +MLXSW_ITEM32(reg, sbmm, max_buff, 0x1C, 0, 24); + +/* reg_sbmm_pool + * Association of the port-priority to a pool. + * Access: RW + */ +MLXSW_ITEM32(reg, sbmm, pool, 0x24, 0, 4); + +static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff, + u32 max_buff, u8 pool) +{ + MLXSW_REG_ZERO(sbmm, payload); + mlxsw_reg_sbmm_prio_set(payload, prio); + mlxsw_reg_sbmm_min_buff_set(payload, min_buff); + mlxsw_reg_sbmm_max_buff_set(payload, max_buff); + mlxsw_reg_sbmm_pool_set(payload, pool); +} + +static inline const char *mlxsw_reg_id_str(u16 reg_id) +{ + switch (reg_id) { + case MLXSW_REG_SGCR_ID: + return "SGCR"; + case MLXSW_REG_SPAD_ID: + return "SPAD"; + case MLXSW_REG_SSPR_ID: + return "SSPR"; + case MLXSW_REG_SFDAT_ID: + return "SFDAT"; + case MLXSW_REG_SFD_ID: + return "SFD"; + case MLXSW_REG_SFN_ID: + return "SFN"; + case MLXSW_REG_SPMS_ID: + return "SPMS"; + case MLXSW_REG_SPVID_ID: + return "SPVID"; + case MLXSW_REG_SPVM_ID: + return "SPVM"; + case MLXSW_REG_SFGC_ID: + return "SFGC"; + case MLXSW_REG_SFTR_ID: + return "SFTR"; + case MLXSW_REG_SPMLR_ID: + return "SPMLR"; + case MLXSW_REG_SVFA_ID: + return "SVFA"; + case MLXSW_REG_SVPE_ID: + return "SVPE"; + case MLXSW_REG_SFMR_ID: + return "SFMR"; + case MLXSW_REG_SPVMLR_ID: + return "SPVMLR"; + case MLXSW_REG_PMLP_ID: + return "PMLP"; + case MLXSW_REG_PMTU_ID: + return "PMTU"; + case MLXSW_REG_PTYS_ID: + return "PTYS"; + case MLXSW_REG_PPAD_ID: + return "PPAD"; + case MLXSW_REG_PAOS_ID: + return "PAOS"; + case MLXSW_REG_PPCNT_ID: + return "PPCNT"; + case MLXSW_REG_PBMC_ID: + return "PBMC"; + case MLXSW_REG_PSPA_ID: + return "PSPA"; + case MLXSW_REG_HTGT_ID: + return "HTGT"; + case MLXSW_REG_HPKT_ID: + return "HPKT"; + case MLXSW_REG_SBPR_ID: + return "SBPR"; + case MLXSW_REG_SBCM_ID: + return "SBCM"; + case MLXSW_REG_SBPM_ID: + return "SBPM"; + case MLXSW_REG_SBMM_ID: + return "SBMM"; + default: + return "*UNKNOWN*"; + } +} + +/* PUDE - Port Up / Down Event + * --------------------------- + * Reports the operational state change of a port. + */ +#define MLXSW_REG_PUDE_LEN 0x10 + +/* reg_pude_swid + * Switch partition ID with which to associate the port. + * Access: Index + */ +MLXSW_ITEM32(reg, pude, swid, 0x00, 24, 8); + +/* reg_pude_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pude, local_port, 0x00, 16, 8); + +/* reg_pude_admin_status + * Port administrative state (the desired state). + * 1 - Up. + * 2 - Down. + * 3 - Up once. This means that in case of link failure, the port won't go + * into polling mode, but will wait to be re-enabled by software. + * 4 - Disabled by system. Can only be set by hardware. + * Access: RO + */ +MLXSW_ITEM32(reg, pude, admin_status, 0x00, 8, 4); + +/* reg_pude_oper_status + * Port operatioanl state. + * 1 - Up. + * 2 - Down. + * 3 - Down by port failure. This means that the device will not let the + * port up again until explicitly specified by software. + * Access: RO + */ +MLXSW_ITEM32(reg, pude, oper_status, 0x00, 0, 4); + +#endif diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum.c new file mode 100644 index 000000000..3be4a2355 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -0,0 +1,1949 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <linux/workqueue.h> +#include <linux/jiffies.h> +#include <linux/bitops.h> +#include <net/switchdev.h> +#include <generated/utsrelease.h> + +#include "spectrum.h" +#include "core.h" +#include "reg.h" +#include "port.h" +#include "trap.h" +#include "txheader.h" + +static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum"; +static const char mlxsw_sp_driver_version[] = "1.0"; + +/* tx_hdr_version + * Tx header version. + * Must be set to 1. + */ +MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4); + +/* tx_hdr_ctl + * Packet control type. + * 0 - Ethernet control (e.g. EMADs, LACP) + * 1 - Ethernet data + */ +MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2); + +/* tx_hdr_proto + * Packet protocol type. Must be set to 1 (Ethernet). + */ +MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3); + +/* tx_hdr_rx_is_router + * Packet is sent from the router. Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, rx_is_router, 0x00, 19, 1); + +/* tx_hdr_fid_valid + * Indicates if the 'fid' field is valid and should be used for + * forwarding lookup. Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, fid_valid, 0x00, 16, 1); + +/* tx_hdr_swid + * Switch partition ID. Must be set to 0. + */ +MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3); + +/* tx_hdr_control_tclass + * Indicates if the packet should use the control TClass and not one + * of the data TClasses. + */ +MLXSW_ITEM32(tx, hdr, control_tclass, 0x00, 6, 1); + +/* tx_hdr_etclass + * Egress TClass to be used on the egress device on the egress port. + */ +MLXSW_ITEM32(tx, hdr, etclass, 0x00, 0, 4); + +/* tx_hdr_port_mid + * Destination local port for unicast packets. + * Destination multicast ID for multicast packets. + * + * Control packets are directed to a specific egress port, while data + * packets are transmitted through the CPU port (0) into the switch partition, + * where forwarding rules are applied. + */ +MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16); + +/* tx_hdr_fid + * Forwarding ID used for L2 forwarding lookup. Valid only if 'fid_valid' is + * set, otherwise calculated based on the packet's VID using VID to FID mapping. + * Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16); + +/* tx_hdr_type + * 0 - Data packets + * 6 - Control packets + */ +MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); + +static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN); + + memset(txhdr, 0, MLXSW_TXHDR_LEN); + + mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); + mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL); + mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); + mlxsw_tx_hdr_swid_set(txhdr, 0); + mlxsw_tx_hdr_control_tclass_set(txhdr, 1); + mlxsw_tx_hdr_port_mid_set(txhdr, tx_info->local_port); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); +} + +static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) +{ + char spad_pl[MLXSW_REG_SPAD_LEN]; + int err; + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(spad), spad_pl); + if (err) + return err; + mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_sp->base_mac); + return 0; +} + +static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool is_up) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char paos_pl[MLXSW_REG_PAOS_LEN]; + + mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port, + is_up ? MLXSW_PORT_ADMIN_STATUS_UP : + MLXSW_PORT_ADMIN_STATUS_DOWN); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(paos), paos_pl); +} + +static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port, + bool *p_is_up) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char paos_pl[MLXSW_REG_PAOS_LEN]; + u8 oper_status; + int err; + + mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(paos), paos_pl); + if (err) + return err; + oper_status = mlxsw_reg_paos_oper_status_get(paos_pl); + *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false; + return 0; +} + +static int mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + int err; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, + MLXSW_SP_VFID_BASE + vfid, 0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); + + if (err) + return err; + + set_bit(vfid, mlxsw_sp->active_vfids); + return 0; +} + +static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + clear_bit(vfid, mlxsw_sp->active_vfids); + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, + MLXSW_SP_VFID_BASE + vfid, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port, + unsigned char *addr) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ppad_pl[MLXSW_REG_PPAD_LEN]; + + mlxsw_reg_ppad_pack(ppad_pl, true, mlxsw_sp_port->local_port); + mlxsw_reg_ppad_mac_memcpy_to(ppad_pl, addr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppad), ppad_pl); +} + +static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + unsigned char *addr = mlxsw_sp_port->dev->dev_addr; + + ether_addr_copy(addr, mlxsw_sp->base_mac); + addr[ETH_ALEN - 1] += mlxsw_sp_port->local_port; + return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr); +} + +static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, enum mlxsw_reg_spms_state state) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *spms_pl; + int err; + + spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); + if (!spms_pl) + return -ENOMEM; + mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); + mlxsw_reg_spms_vid_pack(spms_pl, vid, state); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); + kfree(spms_pl); + return err; +} + +static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pmtu_pl[MLXSW_REG_PMTU_LEN]; + int max_mtu; + int err; + + mtu += MLXSW_TXHDR_LEN + ETH_HLEN; + mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); + if (err) + return err; + max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl); + + if (mtu > max_mtu) + return -EINVAL; + + mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); +} + +static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pspa_pl[MLXSW_REG_PSPA_LEN]; + + mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); +} + +static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char svpe_pl[MLXSW_REG_SVPE_LEN]; + + mlxsw_reg_svpe_pack(svpe_pl, mlxsw_sp_port->local_port, enable); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svpe), svpe_pl); +} + +int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_svfa_mt mt, bool valid, u16 fid, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + + mlxsw_reg_svfa_pack(svfa_pl, mlxsw_sp_port->local_port, mt, valid, + fid, vid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, bool learn_enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *spvmlr_pl; + int err; + + spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL); + if (!spvmlr_pl) + return -ENOMEM; + mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid, + learn_enable); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl); + kfree(spvmlr_pl); + return err; +} + +static int +mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sspr_pl[MLXSW_REG_SSPR_LEN]; + + mlxsw_reg_sspr_pack(sspr_pl, mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl); +} + +static int mlxsw_sp_port_module_check(struct mlxsw_sp_port *mlxsw_sp_port, + bool *p_usable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + int err; + + mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + return err; + *p_usable = mlxsw_reg_pmlp_width_get(pmlp_pl) ? true : false; + return 0; +} + +static int mlxsw_sp_port_open(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); + if (err) + return err; + netif_start_queue(dev); + return 0; +} + +static int mlxsw_sp_port_stop(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + netif_stop_queue(dev); + return mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); +} + +static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + const struct mlxsw_tx_info tx_info = { + .local_port = mlxsw_sp_port->local_port, + .is_emad = false, + }; + u64 len; + int err; + + if (mlxsw_core_skb_transmit_busy(mlxsw_sp, &tx_info)) + return NETDEV_TX_BUSY; + + if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { + struct sk_buff *skb_orig = skb; + + skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); + if (!skb) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb_orig); + return NETDEV_TX_OK; + } + } + + if (eth_skb_pad(skb)) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + return NETDEV_TX_OK; + } + + mlxsw_sp_txhdr_construct(skb, &tx_info); + len = skb->len; + /* Due to a race we might fail here because of a full queue. In that + * unlikely case we simply drop the packet. + */ + err = mlxsw_core_skb_transmit(mlxsw_sp, skb, &tx_info); + + if (!err) { + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->tx_packets++; + pcpu_stats->tx_bytes += len; + u64_stats_update_end(&pcpu_stats->syncp); + } else { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + } + return NETDEV_TX_OK; +} + +static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct sockaddr *addr = p; + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + err = mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr->sa_data); + if (err) + return err; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + +static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); + if (err) + return err; + dev->mtu = mtu; + return 0; +} + +static struct rtnl_link_stats64 * +mlxsw_sp_port_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp_port_pcpu_stats *p; + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + u32 tx_dropped = 0; + unsigned int start; + int i; + + for_each_possible_cpu(i) { + p = per_cpu_ptr(mlxsw_sp_port->pcpu_stats, i); + do { + start = u64_stats_fetch_begin_irq(&p->syncp); + rx_packets = p->rx_packets; + rx_bytes = p->rx_bytes; + tx_packets = p->tx_packets; + tx_bytes = p->tx_bytes; + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + /* tx_dropped is u32, updated without syncp protection. */ + tx_dropped += p->tx_dropped; + } + stats->tx_dropped = tx_dropped; + return stats; +} + +int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, + u16 vid_end, bool is_member, bool untagged) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *spvm_pl; + int err; + + spvm_pl = kmalloc(MLXSW_REG_SPVM_LEN, GFP_KERNEL); + if (!spvm_pl) + return -ENOMEM; + + mlxsw_reg_spvm_pack(spvm_pl, mlxsw_sp_port->local_port, vid_begin, + vid_end, is_member, untagged); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvm), spvm_pl); + kfree(spvm_pl); + return err; +} + +static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + u16 vid, last_visited_vid; + int err; + + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, true, vid, + vid); + if (err) { + last_visited_vid = vid; + goto err_port_vid_to_fid_set; + } + } + + err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true); + if (err) { + last_visited_vid = VLAN_N_VID; + goto err_port_vid_to_fid_set; + } + + return 0; + +err_port_vid_to_fid_set: + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, last_visited_vid) + mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, false, vid, + vid); + return err; +} + +static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + u16 vid; + int err; + + err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); + if (err) + return err; + + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, false, + vid, vid); + if (err) + return err; + } + + return 0; +} + +int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, + u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *sftr_pl; + int err; + + /* VLAN 0 is added to HW filter when device goes up, but it is + * reserved in our case, so simply return. + */ + if (!vid) + return 0; + + if (test_bit(vid, mlxsw_sp_port->active_vfids)) { + netdev_warn(dev, "VID=%d already configured\n", vid); + return 0; + } + + if (!test_bit(vid, mlxsw_sp->active_vfids)) { + err = mlxsw_sp_vfid_create(mlxsw_sp, vid); + if (err) { + netdev_err(dev, "Failed to create vFID=%d\n", + MLXSW_SP_VFID_BASE + vid); + return err; + } + + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) { + err = -ENOMEM; + goto err_flood_table_alloc; + } + mlxsw_reg_sftr_pack(sftr_pl, 0, vid, + MLXSW_REG_SFGC_TABLE_TYPE_FID, 0, + MLXSW_PORT_CPU_PORT, true); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + kfree(sftr_pl); + if (err) { + netdev_err(dev, "Failed to configure flood table\n"); + goto err_flood_table_config; + } + } + + /* In case we fail in the following steps, we intentionally do not + * destroy the associated vFID. + */ + + /* When adding the first VLAN interface on a bridged port we need to + * transition all the active 802.1Q bridge VLANs to use explicit + * {Port, VID} to FID mappings and set the port's mode to Virtual mode. + */ + if (!mlxsw_sp_port->nr_vfids) { + err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); + if (err) { + netdev_err(dev, "Failed to set to Virtual mode\n"); + return err; + } + } + + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + true, MLXSW_SP_VFID_BASE + vid, vid); + if (err) { + netdev_err(dev, "Failed to map {Port, VID=%d} to vFID=%d\n", + vid, MLXSW_SP_VFID_BASE + vid); + goto err_port_vid_to_fid_set; + } + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); + if (err) { + netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); + goto err_port_vid_learning_set; + } + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, false); + if (err) { + netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", + vid); + goto err_port_add_vid; + } + + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, vid, + MLXSW_REG_SPMS_STATE_FORWARDING); + if (err) { + netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); + goto err_port_stp_state_set; + } + + mlxsw_sp_port->nr_vfids++; + set_bit(vid, mlxsw_sp_port->active_vfids); + + return 0; + +err_flood_table_config: +err_flood_table_alloc: + mlxsw_sp_vfid_destroy(mlxsw_sp, vid); + return err; + +err_port_stp_state_set: + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); +err_port_add_vid: + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); +err_port_vid_learning_set: + mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, + MLXSW_SP_VFID_BASE + vid, vid); +err_port_vid_to_fid_set: + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + return err; +} + +int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + /* VLAN 0 is removed from HW filter when device goes down, but + * it is reserved in our case, so simply return. + */ + if (!vid) + return 0; + + if (!test_bit(vid, mlxsw_sp_port->active_vfids)) { + netdev_warn(dev, "VID=%d does not exist\n", vid); + return 0; + } + + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, vid, + MLXSW_REG_SPMS_STATE_DISCARDING); + if (err) { + netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); + return err; + } + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); + if (err) { + netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", + vid); + return err; + } + + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); + if (err) { + netdev_err(dev, "Failed to enable learning for VID=%d\n", vid); + return err; + } + + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, + MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + false, MLXSW_SP_VFID_BASE + vid, + vid); + if (err) { + netdev_err(dev, "Failed to invalidate {Port, VID=%d} to vFID=%d mapping\n", + vid, MLXSW_SP_VFID_BASE + vid); + return err; + } + + /* When removing the last VLAN interface on a bridged port we need to + * transition all active 802.1Q bridge VLANs to use VID to FID + * mappings and set port's mode to VLAN mode. + */ + if (mlxsw_sp_port->nr_vfids == 1) { + err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + if (err) { + netdev_err(dev, "Failed to set to VLAN mode\n"); + return err; + } + } + + mlxsw_sp_port->nr_vfids--; + clear_bit(vid, mlxsw_sp_port->active_vfids); + + return 0; +} + +static const struct net_device_ops mlxsw_sp_port_netdev_ops = { + .ndo_open = mlxsw_sp_port_open, + .ndo_stop = mlxsw_sp_port_stop, + .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, + .ndo_change_mtu = mlxsw_sp_port_change_mtu, + .ndo_get_stats64 = mlxsw_sp_port_get_stats64, + .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, + .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, +}; + +static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + strlcpy(drvinfo->driver, mlxsw_sp_driver_name, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, mlxsw_sp_driver_version, + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + mlxsw_sp->bus_info->fw_rev.major, + mlxsw_sp->bus_info->fw_rev.minor, + mlxsw_sp->bus_info->fw_rev.subminor); + strlcpy(drvinfo->bus_info, mlxsw_sp->bus_info->device_name, + sizeof(drvinfo->bus_info)); +} + +struct mlxsw_sp_port_hw_stats { + char str[ETH_GSTRING_LEN]; + u64 (*getter)(char *payload); +}; + +static const struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { + { + .str = "a_frames_transmitted_ok", + .getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get, + }, + { + .str = "a_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_frames_received_ok_get, + }, + { + .str = "a_frame_check_sequence_errors", + .getter = mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get, + }, + { + .str = "a_alignment_errors", + .getter = mlxsw_reg_ppcnt_a_alignment_errors_get, + }, + { + .str = "a_octets_transmitted_ok", + .getter = mlxsw_reg_ppcnt_a_octets_transmitted_ok_get, + }, + { + .str = "a_octets_received_ok", + .getter = mlxsw_reg_ppcnt_a_octets_received_ok_get, + }, + { + .str = "a_multicast_frames_xmitted_ok", + .getter = mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get, + }, + { + .str = "a_broadcast_frames_xmitted_ok", + .getter = mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get, + }, + { + .str = "a_multicast_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get, + }, + { + .str = "a_broadcast_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get, + }, + { + .str = "a_in_range_length_errors", + .getter = mlxsw_reg_ppcnt_a_in_range_length_errors_get, + }, + { + .str = "a_out_of_range_length_field", + .getter = mlxsw_reg_ppcnt_a_out_of_range_length_field_get, + }, + { + .str = "a_frame_too_long_errors", + .getter = mlxsw_reg_ppcnt_a_frame_too_long_errors_get, + }, + { + .str = "a_symbol_error_during_carrier", + .getter = mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get, + }, + { + .str = "a_mac_control_frames_transmitted", + .getter = mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get, + }, + { + .str = "a_mac_control_frames_received", + .getter = mlxsw_reg_ppcnt_a_mac_control_frames_received_get, + }, + { + .str = "a_unsupported_opcodes_received", + .getter = mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get, + }, + { + .str = "a_pause_mac_ctrl_frames_received", + .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_received_get, + }, + { + .str = "a_pause_mac_ctrl_frames_xmitted", + .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_transmitted_get, + }, +}; + +#define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats) + +static void mlxsw_sp_port_get_strings(struct net_device *dev, + u32 stringset, u8 *data) +{ + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + break; + } +} + +static void mlxsw_sp_port_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int i; + int err; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++) + data[i] = !err ? mlxsw_sp_port_hw_stats[i].getter(ppcnt_pl) : 0; +} + +static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return MLXSW_SP_PORT_HW_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +struct mlxsw_sp_port_link_mode { + u32 mask; + u32 supported; + u32 advertised; + u32 speed; +}; + +static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, + .supported = SUPPORTED_100baseT_Full, + .advertised = ADVERTISED_100baseT_Full, + .speed = 100, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX, + .speed = 100, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | + MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, + .supported = SUPPORTED_1000baseKX_Full, + .advertised = ADVERTISED_1000baseKX_Full, + .speed = 1000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T, + .supported = SUPPORTED_10000baseT_Full, + .advertised = ADVERTISED_10000baseT_Full, + .speed = 10000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, + .supported = SUPPORTED_10000baseKX4_Full, + .advertised = ADVERTISED_10000baseKX4_Full, + .speed = 10000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR, + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2, + .supported = SUPPORTED_20000baseKR2_Full, + .advertised = ADVERTISED_20000baseKR2_Full, + .speed = 20000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4, + .supported = SUPPORTED_40000baseCR4_Full, + .advertised = ADVERTISED_40000baseCR4_Full, + .speed = 40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4, + .supported = SUPPORTED_40000baseKR4_Full, + .advertised = ADVERTISED_40000baseKR4_Full, + .speed = 40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4, + .supported = SUPPORTED_40000baseSR4_Full, + .advertised = ADVERTISED_40000baseSR4_Full, + .speed = 40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4, + .supported = SUPPORTED_40000baseLR4_Full, + .advertised = ADVERTISED_40000baseLR4_Full, + .speed = 40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .speed = 25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 | + MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, + .speed = 50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .supported = SUPPORTED_56000baseKR4_Full, + .advertised = ADVERTISED_56000baseKR4_Full, + .speed = 56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + .speed = 100000, + }, +}; + +#define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode) + +static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto) +{ + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_SGMII)) + return SUPPORTED_FIBRE; + + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX)) + return SUPPORTED_Backplane; + return 0; +} + +static u32 mlxsw_sp_from_ptys_supported_link(u32 ptys_eth_proto) +{ + u32 modes = 0; + int i; + + for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) + modes |= mlxsw_sp_port_link_mode[i].supported; + } + return modes; +} + +static u32 mlxsw_sp_from_ptys_advert_link(u32 ptys_eth_proto) +{ + u32 modes = 0; + int i; + + for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) + modes |= mlxsw_sp_port_link_mode[i].advertised; + } + return modes; +} + +static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, + struct ethtool_cmd *cmd) +{ + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + int i; + + if (!carrier_ok) + goto out; + + for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) { + speed = mlxsw_sp_port_link_mode[i].speed; + duplex = DUPLEX_FULL; + break; + } + } +out: + ethtool_cmd_speed_set(cmd, speed); + cmd->duplex = duplex; +} + +static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto) +{ + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_SGMII)) + return PORT_FIBRE; + + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4)) + return PORT_DA; + + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4)) + return PORT_NONE; + + return PORT_OTHER; +} + +static int mlxsw_sp_port_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_cap; + u32 eth_proto_admin; + u32 eth_proto_oper; + int err; + + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) { + netdev_err(dev, "Failed to get proto"); + return err; + } + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, + ð_proto_admin, ð_proto_oper); + + cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) | + mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | + SUPPORTED_Pause | SUPPORTED_Asym_Pause; + cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin); + mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), + eth_proto_oper, cmd); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; + cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper); + cmd->lp_advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_oper); + + cmd->transceiver = XCVR_INTERNAL; + return 0; +} + +static u32 mlxsw_sp_to_ptys_advert_link(u32 advertising) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { + if (advertising & mlxsw_sp_port_link_mode[i].advertised) + ptys_proto |= mlxsw_sp_port_link_mode[i].mask; + } + return ptys_proto; +} + +static u32 mlxsw_sp_to_ptys_speed(u32 speed) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { + if (speed == mlxsw_sp_port_link_mode[i].speed) + ptys_proto |= mlxsw_sp_port_link_mode[i].mask; + } + return ptys_proto; +} + +static int mlxsw_sp_port_set_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 speed; + u32 eth_proto_new; + u32 eth_proto_cap; + u32 eth_proto_admin; + bool is_up; + int err; + + speed = ethtool_cmd_speed(cmd); + + eth_proto_new = cmd->autoneg == AUTONEG_ENABLE ? + mlxsw_sp_to_ptys_advert_link(cmd->advertising) : + mlxsw_sp_to_ptys_speed(speed); + + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) { + netdev_err(dev, "Failed to get proto"); + return err; + } + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, NULL); + + eth_proto_new = eth_proto_new & eth_proto_cap; + if (!eth_proto_new) { + netdev_err(dev, "Not supported proto admin requested"); + return -EINVAL; + } + if (eth_proto_new == eth_proto_admin) + return 0; + + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) { + netdev_err(dev, "Failed to set proto admin"); + return err; + } + + err = mlxsw_sp_port_oper_status_get(mlxsw_sp_port, &is_up); + if (err) { + netdev_err(dev, "Failed to get oper status"); + return err; + } + if (!is_up) + return 0; + + err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + if (err) { + netdev_err(dev, "Failed to set admin status"); + return err; + } + + err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); + if (err) { + netdev_err(dev, "Failed to set admin status"); + return err; + } + + return 0; +} + +static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { + .get_drvinfo = mlxsw_sp_port_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlxsw_sp_port_get_strings, + .get_ethtool_stats = mlxsw_sp_port_get_stats, + .get_sset_count = mlxsw_sp_port_get_sset_count, + .get_settings = mlxsw_sp_port_get_settings, + .set_settings = mlxsw_sp_port_set_settings, +}; + +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct net_device *dev; + bool usable; + int err; + + dev = alloc_etherdev(sizeof(struct mlxsw_sp_port)); + if (!dev) + return -ENOMEM; + mlxsw_sp_port = netdev_priv(dev); + mlxsw_sp_port->dev = dev; + mlxsw_sp_port->mlxsw_sp = mlxsw_sp; + mlxsw_sp_port->local_port = local_port; + mlxsw_sp_port->learning = 1; + mlxsw_sp_port->learning_sync = 1; + mlxsw_sp_port->uc_flood = 1; + mlxsw_sp_port->pvid = 1; + + mlxsw_sp_port->pcpu_stats = + netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats); + if (!mlxsw_sp_port->pcpu_stats) { + err = -ENOMEM; + goto err_alloc_stats; + } + + dev->netdev_ops = &mlxsw_sp_port_netdev_ops; + dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; + + err = mlxsw_sp_port_dev_addr_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unable to init port mac address\n", + mlxsw_sp_port->local_port); + goto err_dev_addr_init; + } + + netif_carrier_off(dev); + + dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG | + NETIF_F_HW_VLAN_CTAG_FILTER; + + /* Each packet needs to have a Tx header (metadata) on top all other + * headers. + */ + dev->hard_header_len += MLXSW_TXHDR_LEN; + + err = mlxsw_sp_port_module_check(mlxsw_sp_port, &usable); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to check module\n", + mlxsw_sp_port->local_port); + goto err_port_module_check; + } + + if (!usable) { + dev_dbg(mlxsw_sp->bus_info->dev, "Port %d: Not usable, skipping initialization\n", + mlxsw_sp_port->local_port); + goto port_not_usable; + } + + err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set system port mapping\n", + mlxsw_sp_port->local_port); + goto err_port_system_port_mapping_set; + } + + err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n", + mlxsw_sp_port->local_port); + goto err_port_swid_set; + } + + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n", + mlxsw_sp_port->local_port); + goto err_port_mtu_set; + } + + err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + if (err) + goto err_port_admin_status_set; + + err = mlxsw_sp_port_buffers_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize buffers\n", + mlxsw_sp_port->local_port); + goto err_port_buffers_init; + } + + mlxsw_sp_port_switchdev_init(mlxsw_sp_port); + err = register_netdev(dev); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n", + mlxsw_sp_port->local_port); + goto err_register_netdev; + } + + err = mlxsw_sp_port_vlan_init(mlxsw_sp_port); + if (err) + goto err_port_vlan_init; + + mlxsw_sp->ports[local_port] = mlxsw_sp_port; + return 0; + +err_port_vlan_init: + unregister_netdev(dev); +err_register_netdev: +err_port_buffers_init: +err_port_admin_status_set: +err_port_mtu_set: +err_port_swid_set: +err_port_system_port_mapping_set: +port_not_usable: +err_port_module_check: +err_dev_addr_init: + free_percpu(mlxsw_sp_port->pcpu_stats); +err_alloc_stats: + free_netdev(dev); + return err; +} + +static void mlxsw_sp_vfids_fini(struct mlxsw_sp *mlxsw_sp) +{ + u16 vfid; + + for_each_set_bit(vfid, mlxsw_sp->active_vfids, VLAN_N_VID) + mlxsw_sp_vfid_destroy(mlxsw_sp, vfid); +} + +static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) +{ + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + if (!mlxsw_sp_port) + return; + mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); + unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); + free_percpu(mlxsw_sp_port->pcpu_stats); + free_netdev(mlxsw_sp_port->dev); +} + +static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + mlxsw_sp_port_remove(mlxsw_sp, i); + kfree(mlxsw_sp->ports); +} + +static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) +{ + size_t alloc_size; + int i; + int err; + + alloc_size = sizeof(struct mlxsw_sp_port *) * MLXSW_PORT_MAX_PORTS; + mlxsw_sp->ports = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_sp->ports) + return -ENOMEM; + + for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { + err = mlxsw_sp_port_create(mlxsw_sp, i); + if (err) + goto err_port_create; + } + return 0; + +err_port_create: + for (i--; i >= 1; i--) + mlxsw_sp_port_remove(mlxsw_sp, i); + kfree(mlxsw_sp->ports); + return err; +} + +static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, + char *pude_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_port *mlxsw_sp_port; + enum mlxsw_reg_pude_oper_status status; + u8 local_port; + + local_port = mlxsw_reg_pude_local_port_get(pude_pl); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) { + dev_warn(mlxsw_sp->bus_info->dev, "Port %d: Link event received for non-existent port\n", + local_port); + return; + } + + status = mlxsw_reg_pude_oper_status_get(pude_pl); + if (status == MLXSW_PORT_OPER_STATUS_UP) { + netdev_info(mlxsw_sp_port->dev, "link up\n"); + netif_carrier_on(mlxsw_sp_port->dev); + } else { + netdev_info(mlxsw_sp_port->dev, "link down\n"); + netif_carrier_off(mlxsw_sp_port->dev); + } +} + +static struct mlxsw_event_listener mlxsw_sp_pude_event = { + .func = mlxsw_sp_pude_event_func, + .trap_id = MLXSW_TRAP_ID_PUDE, +}; + +static int mlxsw_sp_event_register(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_event_trap_id trap_id) +{ + struct mlxsw_event_listener *el; + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int err; + + switch (trap_id) { + case MLXSW_TRAP_ID_PUDE: + el = &mlxsw_sp_pude_event; + break; + } + err = mlxsw_core_event_listener_register(mlxsw_sp->core, el, mlxsw_sp); + if (err) + return err; + + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, trap_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + goto err_event_trap_set; + + return 0; + +err_event_trap_set: + mlxsw_core_event_listener_unregister(mlxsw_sp->core, el, mlxsw_sp); + return err; +} + +static void mlxsw_sp_event_unregister(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_event_trap_id trap_id) +{ + struct mlxsw_event_listener *el; + + switch (trap_id) { + case MLXSW_TRAP_ID_PUDE: + el = &mlxsw_sp_pude_event; + break; + } + mlxsw_core_event_listener_unregister(mlxsw_sp->core, el, mlxsw_sp); +} + +static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port, + void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + + if (unlikely(!mlxsw_sp_port)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", + local_port); + return; + } + + skb->dev = mlxsw_sp_port->dev; + + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->protocol = eth_type_trans(skb, skb->dev); + netif_receive_skb(skb); +} + +static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_FDB_MC, + }, + /* Traps for specific L2 packet types, not trapped as FDB MC */ + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_STP, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_LACP, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_EAPOL, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_LLDP, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_MMRP, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_MVRP, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_RPVST, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_DHCP, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_QUERY, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT, + }, +}; + +static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int i; + int err; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_RX); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_CTRL); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) { + err = mlxsw_core_rx_listener_register(mlxsw_sp->core, + &mlxsw_sp_rx_listener[i], + mlxsw_sp); + if (err) + goto err_rx_listener_register; + + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, + mlxsw_sp_rx_listener[i].trap_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + goto err_rx_trap_set; + } + return 0; + +err_rx_trap_set: + mlxsw_core_rx_listener_unregister(mlxsw_sp->core, + &mlxsw_sp_rx_listener[i], + mlxsw_sp); +err_rx_listener_register: + for (i--; i >= 0; i--) { + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_sp_rx_listener[i].trap_id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); + + mlxsw_core_rx_listener_unregister(mlxsw_sp->core, + &mlxsw_sp_rx_listener[i], + mlxsw_sp); + } + return err; +} + +static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) { + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_sp_rx_listener[i].trap_id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); + + mlxsw_core_rx_listener_unregister(mlxsw_sp->core, + &mlxsw_sp_rx_listener[i], + mlxsw_sp); + } +} + +static int __mlxsw_sp_flood_init(struct mlxsw_core *mlxsw_core, + enum mlxsw_reg_sfgc_type type, + enum mlxsw_reg_sfgc_bridge_type bridge_type) +{ + enum mlxsw_flood_table_type table_type; + enum mlxsw_sp_flood_table flood_table; + char sfgc_pl[MLXSW_REG_SFGC_LEN]; + + if (bridge_type == MLXSW_REG_SFGC_BRIDGE_TYPE_VFID) { + table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; + flood_table = 0; + } else { + table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; + if (type == MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST) + flood_table = MLXSW_SP_FLOOD_TABLE_UC; + else + flood_table = MLXSW_SP_FLOOD_TABLE_BM; + } + + mlxsw_reg_sfgc_pack(sfgc_pl, type, bridge_type, table_type, + flood_table); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(sfgc), sfgc_pl); +} + +static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) +{ + int type, err; + + /* For non-offloaded netdevs, flood all traffic types to CPU + * port. + */ + for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) { + if (type == MLXSW_REG_SFGC_TYPE_RESERVED) + continue; + + err = __mlxsw_sp_flood_init(mlxsw_sp->core, type, + MLXSW_REG_SFGC_BRIDGE_TYPE_VFID); + if (err) + return err; + } + + /* For bridged ports, use one flooding table for unknown unicast + * traffic and a second table for unregistered multicast and + * broadcast. + */ + for (type = 0; type < MLXSW_REG_SFGC_TYPE_MAX; type++) { + if (type == MLXSW_REG_SFGC_TYPE_RESERVED) + continue; + + err = __mlxsw_sp_flood_init(mlxsw_sp->core, type, + MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info) +{ + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + mlxsw_sp->core = mlxsw_core; + mlxsw_sp->bus_info = mlxsw_bus_info; + + err = mlxsw_sp_base_mac_get(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to get base mac\n"); + return err; + } + + err = mlxsw_sp_ports_create(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); + goto err_ports_create; + } + + err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE events\n"); + goto err_event_register; + } + + err = mlxsw_sp_traps_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps for RX\n"); + goto err_rx_listener_register; + } + + err = mlxsw_sp_flood_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize flood tables\n"); + goto err_flood_init; + } + + err = mlxsw_sp_buffers_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize buffers\n"); + goto err_buffers_init; + } + + err = mlxsw_sp_switchdev_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n"); + goto err_switchdev_init; + } + + return 0; + +err_switchdev_init: +err_buffers_init: +err_flood_init: + mlxsw_sp_traps_fini(mlxsw_sp); +err_rx_listener_register: + mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); +err_event_register: + mlxsw_sp_ports_remove(mlxsw_sp); +err_ports_create: + mlxsw_sp_vfids_fini(mlxsw_sp); + return err; +} + +static void mlxsw_sp_fini(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_traps_fini(mlxsw_sp); + mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); + mlxsw_sp_ports_remove(mlxsw_sp); + mlxsw_sp_vfids_fini(mlxsw_sp); +} + +static struct mlxsw_config_profile mlxsw_sp_config_profile = { + .used_max_vepa_channels = 1, + .max_vepa_channels = 0, + .used_max_lag = 1, + .max_lag = 64, + .used_max_port_per_lag = 1, + .max_port_per_lag = 16, + .used_max_mid = 1, + .max_mid = 7000, + .used_max_pgt = 1, + .max_pgt = 0, + .used_max_system_port = 1, + .max_system_port = 64, + .used_max_vlan_groups = 1, + .max_vlan_groups = 127, + .used_max_regions = 1, + .max_regions = 400, + .used_flood_tables = 1, + .used_flood_mode = 1, + .flood_mode = 3, + .max_fid_offset_flood_tables = 2, + .fid_offset_flood_table_size = VLAN_N_VID - 1, + .max_fid_flood_tables = 1, + .fid_flood_table_size = VLAN_N_VID, + .used_max_ib_mc = 1, + .max_ib_mc = 0, + .used_max_pkey = 1, + .max_pkey = 0, + .swid_config = { + { + .used_type = 1, + .type = MLXSW_PORT_SWID_TYPE_ETH, + } + }, +}; + +static struct mlxsw_driver mlxsw_sp_driver = { + .kind = MLXSW_DEVICE_KIND_SPECTRUM, + .owner = THIS_MODULE, + .priv_size = sizeof(struct mlxsw_sp), + .init = mlxsw_sp_init, + .fini = mlxsw_sp_fini, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp_config_profile, +}; + +static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +{ + return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; +} + +static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + /* When port is not bridged untagged packets are tagged with + * PVID=VID=1, thereby creating an implicit VLAN interface in + * the device. Remove it and let bridge code take care of its + * own VLANs. + */ + err = mlxsw_sp_port_kill_vid(dev, 0, 1); + if (err) + netdev_err(dev, "Failed to remove VID 1\n"); + + return err; +} + +static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + /* Add implicit VLAN interface in the device, so that untagged + * packets will be classified to the default vFID. + */ + err = mlxsw_sp_port_add_vid(dev, 0, 1); + if (err) + netdev_err(dev, "Failed to add VID 1\n"); + + return err; +} + +static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) +{ + return !mlxsw_sp->master_bridge.dev || + mlxsw_sp->master_bridge.dev == br_dev; +} + +static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) +{ + mlxsw_sp->master_bridge.dev = br_dev; + mlxsw_sp->master_bridge.ref_count++; +} + +static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) +{ + if (--mlxsw_sp->master_bridge.ref_count == 0) + mlxsw_sp->master_bridge.dev = NULL; +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; + struct mlxsw_sp_port *mlxsw_sp_port; + struct net_device *upper_dev; + struct mlxsw_sp *mlxsw_sp; + int err; + + if (!mlxsw_sp_port_dev_check(dev)) + return NOTIFY_DONE; + + mlxsw_sp_port = netdev_priv(dev); + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + info = ptr; + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + /* HW limitation forbids to put ports to multiple bridges. */ + if (info->master && info->linking && + netif_is_bridge_master(upper_dev) && + !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) + return NOTIFY_BAD; + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (info->master && + netif_is_bridge_master(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); + if (err) + netdev_err(dev, "Failed to join bridge\n"); + mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev); + mlxsw_sp_port->bridged = 1; + } else { + err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port); + if (err) + netdev_err(dev, "Failed to leave bridge\n"); + mlxsw_sp_port->bridged = 0; + mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); + } + } + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { + .notifier_call = mlxsw_sp_netdevice_event, +}; + +static int __init mlxsw_sp_module_init(void) +{ + int err; + + register_netdevice_notifier(&mlxsw_sp_netdevice_nb); + err = mlxsw_core_driver_register(&mlxsw_sp_driver); + if (err) + goto err_core_driver_register; + return 0; + +err_core_driver_register: + unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); + return err; +} + +static void __exit mlxsw_sp_module_exit(void) +{ + mlxsw_core_driver_unregister(&mlxsw_sp_driver); + unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); +} + +module_init(mlxsw_sp_module_init); +module_exit(mlxsw_sp_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox Spectrum driver"); +MODULE_MLXSW_DRIVER_ALIAS(MLXSW_DEVICE_KIND_SPECTRUM); diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum.h new file mode 100644 index 000000000..4365c8bcc --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -0,0 +1,122 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_H +#define _MLXSW_SPECTRUM_H + +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/bitops.h> +#include <linux/if_vlan.h> +#include <net/switchdev.h> + +#include "core.h" + +#define MLXSW_SP_VFID_BASE VLAN_N_VID + +struct mlxsw_sp_port; + +struct mlxsw_sp { + unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; + unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; + struct mlxsw_sp_port **ports; + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + unsigned char base_mac[ETH_ALEN]; + struct { + struct delayed_work dw; +#define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100 + unsigned int interval; /* ms */ + } fdb_notify; +#define MLXSW_SP_DEFAULT_AGEING_TIME 300 + u32 ageing_time; + struct { + struct net_device *dev; + unsigned int ref_count; + } master_bridge; +}; + +struct mlxsw_sp_port_pcpu_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 tx_dropped; +}; + +struct mlxsw_sp_port { + struct net_device *dev; + struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; + struct mlxsw_sp *mlxsw_sp; + u8 local_port; + u8 stp_state; + u8 learning:1, + learning_sync:1, + uc_flood:1, + bridged:1; + u16 pvid; + /* 802.1Q bridge VLANs */ + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + /* VLAN interfaces */ + unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; + u16 nr_vfids; +}; + +enum mlxsw_sp_flood_table { + MLXSW_SP_FLOOD_TABLE_UC, + MLXSW_SP_FLOOD_TABLE_BM, +}; + +int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port); + +int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_svfa_mt mt, bool valid, u16 fid, + u16 vid); +int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, + u16 vid_end, bool is_member, bool untagged); +int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, + u16 vid); +int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid); + +#endif diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c new file mode 100644 index 000000000..d59195e3f --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -0,0 +1,422 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> + +#include "spectrum.h" +#include "core.h" +#include "port.h" +#include "reg.h" + +struct mlxsw_sp_pb { + u8 index; + u16 size; +}; + +#define MLXSW_SP_PB(_index, _size) \ + { \ + .index = _index, \ + .size = _size, \ + } + +static const struct mlxsw_sp_pb mlxsw_sp_pbs[] = { + MLXSW_SP_PB(0, 208), + MLXSW_SP_PB(1, 208), + MLXSW_SP_PB(2, 208), + MLXSW_SP_PB(3, 208), + MLXSW_SP_PB(4, 208), + MLXSW_SP_PB(5, 208), + MLXSW_SP_PB(6, 208), + MLXSW_SP_PB(7, 208), + MLXSW_SP_PB(9, 208), +}; + +#define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) + +static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + char pbmc_pl[MLXSW_REG_PBMC_LEN]; + int i; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, + 0xffff, 0xffff / 2); + for (i = 0; i < MLXSW_SP_PBS_LEN; i++) { + const struct mlxsw_sp_pb *pb; + + pb = &mlxsw_sp_pbs[i]; + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pb->index, pb->size); + } + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, + MLXSW_REG(pbmc), pbmc_pl); +} + +#define MLXSW_SP_SB_BYTES_PER_CELL 96 + +struct mlxsw_sp_sb_pool { + u8 pool; + enum mlxsw_reg_sbpr_dir dir; + enum mlxsw_reg_sbpr_mode mode; + u32 size; +}; + +#define MLXSW_SP_SB_POOL_INGRESS_SIZE \ + ((15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) / \ + MLXSW_SP_SB_BYTES_PER_CELL) +#define MLXSW_SP_SB_POOL_EGRESS_SIZE \ + ((14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) / \ + MLXSW_SP_SB_BYTES_PER_CELL) + +#define MLXSW_SP_SB_POOL(_pool, _dir, _mode, _size) \ + { \ + .pool = _pool, \ + .dir = _dir, \ + .mode = _mode, \ + .size = _size, \ + } + +#define MLXSW_SP_SB_POOL_INGRESS(_pool, _size) \ + MLXSW_SP_SB_POOL(_pool, MLXSW_REG_SBPR_DIR_INGRESS, \ + MLXSW_REG_SBPR_MODE_DYNAMIC, _size) + +#define MLXSW_SP_SB_POOL_EGRESS(_pool, _size) \ + MLXSW_SP_SB_POOL(_pool, MLXSW_REG_SBPR_DIR_EGRESS, \ + MLXSW_REG_SBPR_MODE_DYNAMIC, _size) + +static const struct mlxsw_sp_sb_pool mlxsw_sp_sb_pools[] = { + MLXSW_SP_SB_POOL_INGRESS(0, MLXSW_SP_SB_POOL_INGRESS_SIZE), + MLXSW_SP_SB_POOL_INGRESS(1, 0), + MLXSW_SP_SB_POOL_INGRESS(2, 0), + MLXSW_SP_SB_POOL_INGRESS(3, 0), + MLXSW_SP_SB_POOL_EGRESS(0, MLXSW_SP_SB_POOL_EGRESS_SIZE), + MLXSW_SP_SB_POOL_EGRESS(1, 0), + MLXSW_SP_SB_POOL_EGRESS(2, 0), + MLXSW_SP_SB_POOL_EGRESS(2, MLXSW_SP_SB_POOL_EGRESS_SIZE), +}; + +#define MLXSW_SP_SB_POOLS_LEN ARRAY_SIZE(mlxsw_sp_sb_pools) + +static int mlxsw_sp_sb_pools_init(struct mlxsw_sp *mlxsw_sp) +{ + char sbpr_pl[MLXSW_REG_SBPR_LEN]; + int i; + int err; + + for (i = 0; i < MLXSW_SP_SB_POOLS_LEN; i++) { + const struct mlxsw_sp_sb_pool *pool; + + pool = &mlxsw_sp_sb_pools[i]; + mlxsw_reg_sbpr_pack(sbpr_pl, pool->pool, pool->dir, + pool->mode, pool->size); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); + if (err) + return err; + } + return 0; +} + +struct mlxsw_sp_sb_cm { + union { + u8 pg; + u8 tc; + } u; + enum mlxsw_reg_sbcm_dir dir; + u32 min_buff; + u32 max_buff; + u8 pool; +}; + +#define MLXSW_SP_SB_CM(_pg_tc, _dir, _min_buff, _max_buff, _pool) \ + { \ + .u.pg = _pg_tc, \ + .dir = _dir, \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool = _pool, \ + } + +#define MLXSW_SP_SB_CM_INGRESS(_pg, _min_buff, _max_buff) \ + MLXSW_SP_SB_CM(_pg, MLXSW_REG_SBCM_DIR_INGRESS, \ + _min_buff, _max_buff, 0) + +#define MLXSW_SP_SB_CM_EGRESS(_tc, _min_buff, _max_buff) \ + MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBCM_DIR_EGRESS, \ + _min_buff, _max_buff, 0) + +#define MLXSW_SP_CPU_PORT_SB_CM_EGRESS(_tc) \ + MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBCM_DIR_EGRESS, 104, 2, 3) + +static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms[] = { + MLXSW_SP_SB_CM_INGRESS(0, 10000 / MLXSW_SP_SB_BYTES_PER_CELL, 8), + MLXSW_SP_SB_CM_INGRESS(1, 0, 0), + MLXSW_SP_SB_CM_INGRESS(2, 0, 0), + MLXSW_SP_SB_CM_INGRESS(3, 0, 0), + MLXSW_SP_SB_CM_INGRESS(4, 0, 0), + MLXSW_SP_SB_CM_INGRESS(5, 0, 0), + MLXSW_SP_SB_CM_INGRESS(6, 0, 0), + MLXSW_SP_SB_CM_INGRESS(7, 0, 0), + MLXSW_SP_SB_CM_INGRESS(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff), + MLXSW_SP_SB_CM_EGRESS(0, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_EGRESS(1, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_EGRESS(2, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_EGRESS(3, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_EGRESS(4, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_EGRESS(5, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_EGRESS(6, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_EGRESS(7, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_EGRESS(8, 0, 0), + MLXSW_SP_SB_CM_EGRESS(9, 0, 0), + MLXSW_SP_SB_CM_EGRESS(10, 0, 0), + MLXSW_SP_SB_CM_EGRESS(11, 0, 0), + MLXSW_SP_SB_CM_EGRESS(12, 0, 0), + MLXSW_SP_SB_CM_EGRESS(13, 0, 0), + MLXSW_SP_SB_CM_EGRESS(14, 0, 0), + MLXSW_SP_SB_CM_EGRESS(15, 0, 0), + MLXSW_SP_SB_CM_EGRESS(16, 1, 0xff), +}; + +#define MLXSW_SP_SB_CMS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms) + +static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(0), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(1), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(2), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(3), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(4), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(5), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(6), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(7), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(8), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(9), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(10), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(11), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(12), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(13), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(14), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(15), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(16), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(17), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(18), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(19), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(20), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(21), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(22), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(23), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(24), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(25), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(26), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(27), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(28), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(29), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(30), + MLXSW_SP_CPU_PORT_SB_CM_EGRESS(31), +}; + +#define MLXSW_SP_CPU_PORT_SB_MCS_LEN \ + ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms) + +static int mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, + const struct mlxsw_sp_sb_cm *cms, + size_t cms_len) +{ + char sbcm_pl[MLXSW_REG_SBCM_LEN]; + int i; + int err; + + for (i = 0; i < cms_len; i++) { + const struct mlxsw_sp_sb_cm *cm; + + cm = &cms[i]; + mlxsw_reg_sbcm_pack(sbcm_pl, local_port, cm->u.pg, cm->dir, + cm->min_buff, cm->max_buff, cm->pool); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); + if (err) + return err; + } + return 0; +} + +static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, mlxsw_sp_sb_cms, + MLXSW_SP_SB_CMS_LEN); +} + +static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp_sb_cms_init(mlxsw_sp, 0, mlxsw_sp_cpu_port_sb_cms, + MLXSW_SP_CPU_PORT_SB_MCS_LEN); +} + +struct mlxsw_sp_sb_pm { + u8 pool; + enum mlxsw_reg_sbpm_dir dir; + u32 min_buff; + u32 max_buff; +}; + +#define MLXSW_SP_SB_PM(_pool, _dir, _min_buff, _max_buff) \ + { \ + .pool = _pool, \ + .dir = _dir, \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + } + +#define MLXSW_SP_SB_PM_INGRESS(_pool, _min_buff, _max_buff) \ + MLXSW_SP_SB_PM(_pool, MLXSW_REG_SBPM_DIR_INGRESS, \ + _min_buff, _max_buff) + +#define MLXSW_SP_SB_PM_EGRESS(_pool, _min_buff, _max_buff) \ + MLXSW_SP_SB_PM(_pool, MLXSW_REG_SBPM_DIR_EGRESS, \ + _min_buff, _max_buff) + +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { + MLXSW_SP_SB_PM_INGRESS(0, 0, 0xff), + MLXSW_SP_SB_PM_INGRESS(1, 0, 0), + MLXSW_SP_SB_PM_INGRESS(2, 0, 0), + MLXSW_SP_SB_PM_INGRESS(3, 0, 0), + MLXSW_SP_SB_PM_EGRESS(0, 0, 7), + MLXSW_SP_SB_PM_EGRESS(1, 0, 0), + MLXSW_SP_SB_PM_EGRESS(2, 0, 0), + MLXSW_SP_SB_PM_EGRESS(3, 0, 0), +}; + +#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) + +static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + int i; + int err; + + for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { + const struct mlxsw_sp_sb_pm *pm; + + pm = &mlxsw_sp_sb_pms[i]; + mlxsw_reg_sbpm_pack(sbpm_pl, mlxsw_sp_port->local_port, + pm->pool, pm->dir, + pm->min_buff, pm->max_buff); + err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, + MLXSW_REG(sbpm), sbpm_pl); + if (err) + return err; + } + return 0; +} + +struct mlxsw_sp_sb_mm { + u8 prio; + u32 min_buff; + u32 max_buff; + u8 pool; +}; + +#define MLXSW_SP_SB_MM(_prio, _min_buff, _max_buff, _pool) \ + { \ + .prio = _prio, \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool = _pool, \ + } + +static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { + MLXSW_SP_SB_MM(0, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(1, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(2, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(3, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(4, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(5, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(6, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(7, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(8, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(10, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(11, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(12, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(13, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(14, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), +}; + +#define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) + +static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) +{ + char sbmm_pl[MLXSW_REG_SBMM_LEN]; + int i; + int err; + + for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) { + const struct mlxsw_sp_sb_mm *mc; + + mc = &mlxsw_sp_sb_mms[i]; + mlxsw_reg_sbmm_pack(sbmm_pl, mc->prio, mc->min_buff, + mc->max_buff, mc->pool); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); + if (err) + return err; + } + return 0; +} + +int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_sb_pools_init(mlxsw_sp); + if (err) + return err; + err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); + if (err) + return err; + err = mlxsw_sp_sb_mms_init(mlxsw_sp); + + return err; +} + +int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_pb_init(mlxsw_sp_port); + if (err) + return err; + err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port); + if (err) + return err; + err = mlxsw_sp_port_sb_pms_init(mlxsw_sp_port); + + return err; +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c new file mode 100644 index 000000000..7dbeafa65 --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -0,0 +1,906 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/if_bridge.h> +#include <linux/workqueue.h> +#include <linux/jiffies.h> +#include <linux/rtnetlink.h> +#include <net/switchdev.h> + +#include "spectrum.h" +#include "core.h" +#include "reg.h" + +static int mlxsw_sp_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); + memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac, + attr->u.ppid.id_len); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + attr->u.brport_flags = + (mlxsw_sp_port->learning ? BR_LEARNING : 0) | + (mlxsw_sp_port->learning_sync ? BR_LEARNING_SYNC : 0) | + (mlxsw_sp_port->uc_flood ? BR_FLOOD : 0); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 state) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + enum mlxsw_reg_spms_state spms_state; + char *spms_pl; + u16 vid; + int err; + + switch (state) { + case BR_STATE_DISABLED: /* fall-through */ + case BR_STATE_FORWARDING: + spms_state = MLXSW_REG_SPMS_STATE_FORWARDING; + break; + case BR_STATE_LISTENING: /* fall-through */ + case BR_STATE_LEARNING: + spms_state = MLXSW_REG_SPMS_STATE_LEARNING; + break; + case BR_STATE_BLOCKING: + spms_state = MLXSW_REG_SPMS_STATE_DISCARDING; + break; + default: + BUG(); + } + + spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); + if (!spms_pl) + return -ENOMEM; + mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) + mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); + kfree(spms_pl); + return err; +} + +static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + u8 state) +{ + if (switchdev_trans_ph_prepare(trans)) + return 0; + + mlxsw_sp_port->stp_state = state; + return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state); +} + +static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid_begin, u16 fid_end, bool set, + bool only_uc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u16 range = fid_end - fid_begin + 1; + char *sftr_pl; + int err; + + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) + return -ENOMEM; + + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, fid_begin, + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST, range, + mlxsw_sp_port->local_port, set); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + if (err) + goto buffer_out; + + /* Flooding control allows one to decide whether a given port will + * flood unicast traffic for which there is no FDB entry. + */ + if (only_uc) + goto buffer_out; + + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, fid_begin, + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST, range, + mlxsw_sp_port->local_port, set); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + +buffer_out: + kfree(sftr_pl); + return err; +} + +static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool set) +{ + struct net_device *dev = mlxsw_sp_port->dev; + u16 vid, last_visited_vid; + int err; + + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { + err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid, vid, set, + true); + if (err) { + last_visited_vid = vid; + goto err_port_flood_set; + } + } + + return 0; + +err_port_flood_set: + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, last_visited_vid) + __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid, vid, !set, true); + netdev_err(dev, "Failed to configure unicast flooding\n"); + return err; +} + +static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + unsigned long brport_flags) +{ + unsigned long uc_flood = mlxsw_sp_port->uc_flood ? BR_FLOOD : 0; + bool set; + int err; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + if ((uc_flood ^ brport_flags) & BR_FLOOD) { + set = mlxsw_sp_port->uc_flood ? false : true; + err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, set); + if (err) + return err; + } + + mlxsw_sp_port->uc_flood = brport_flags & BR_FLOOD ? 1 : 0; + mlxsw_sp_port->learning = brport_flags & BR_LEARNING ? 1 : 0; + mlxsw_sp_port->learning_sync = brport_flags & BR_LEARNING_SYNC ? 1 : 0; + + return 0; +} + +static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time) +{ + char sfdat_pl[MLXSW_REG_SFDAT_LEN]; + int err; + + mlxsw_reg_sfdat_pack(sfdat_pl, ageing_time); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdat), sfdat_pl); + if (err) + return err; + mlxsw_sp->ageing_time = ageing_time; + return 0; +} + +static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + unsigned long ageing_clock_t) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t); + u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time); +} + +static int mlxsw_sp_port_attr_set(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err = 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans, + attr->u.stp_state); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans, + attr->u.brport_flags); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans, + attr->u.ageing_time); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spvid_pl[MLXSW_REG_SPVID_LEN]; + + mlxsw_reg_spvid_pack(spvid_pl, mlxsw_sp_port->local_port, vid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl); +} + +static int mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + int err; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, fid); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); + + if (err) + return err; + + set_bit(fid, mlxsw_sp->active_fids); + return 0; +} + +static void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, u16 fid) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + clear_bit(fid, mlxsw_sp->active_fids); + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, + fid, fid); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) +{ + enum mlxsw_reg_svfa_mt mt; + + if (mlxsw_sp_port->nr_vfids) + mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + else + mt = MLXSW_REG_SVFA_MT_VID_TO_FID; + + return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, true, fid, fid); +} + +static int mlxsw_sp_port_fid_unmap(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) +{ + enum mlxsw_reg_svfa_mt mt; + + if (!mlxsw_sp_port->nr_vfids) + return 0; + + mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, false, fid, fid); +} + +static int mlxsw_sp_port_add_vids(struct net_device *dev, u16 vid_begin, + u16 vid_end) +{ + u16 vid; + int err; + + for (vid = vid_begin; vid <= vid_end; vid++) { + err = mlxsw_sp_port_add_vid(dev, 0, vid); + if (err) + goto err_port_add_vid; + } + return 0; + +err_port_add_vid: + for (vid--; vid >= vid_begin; vid--) + mlxsw_sp_port_kill_vid(dev, 0, vid); + return err; +} + +static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool flag_untagged, bool flag_pvid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *dev = mlxsw_sp_port->dev; + enum mlxsw_reg_svfa_mt mt; + u16 vid, vid_e; + int err; + + /* In case this is invoked with BRIDGE_FLAGS_SELF and port is + * not bridged, then packets ingressing through the port with + * the specified VIDs will be directed to CPU. + */ + if (!mlxsw_sp_port->bridged) + return mlxsw_sp_port_add_vids(dev, vid_begin, vid_end); + + for (vid = vid_begin; vid <= vid_end; vid++) { + if (!test_bit(vid, mlxsw_sp->active_fids)) { + err = mlxsw_sp_fid_create(mlxsw_sp, vid); + if (err) { + netdev_err(dev, "Failed to create FID=%d\n", + vid); + return err; + } + + /* When creating a FID, we set a VID to FID mapping + * regardless of the port's mode. + */ + mt = MLXSW_REG_SVFA_MT_VID_TO_FID; + err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, + true, vid, vid); + if (err) { + netdev_err(dev, "Failed to create FID=VID=%d mapping\n", + vid); + return err; + } + } + + /* Set FID mapping according to port's mode */ + err = mlxsw_sp_port_fid_map(mlxsw_sp_port, vid); + if (err) { + netdev_err(dev, "Failed to map FID=%d", vid); + return err; + } + } + + err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, + true, false); + if (err) { + netdev_err(dev, "Failed to configure flooding\n"); + return err; + } + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), + vid_end); + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, true, + flag_untagged); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Unable to add VIDs %d-%d\n", + vid, vid_e); + return err; + } + } + + vid = vid_begin; + if (flag_pvid && mlxsw_sp_port->pvid != vid) { + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Unable to add PVID %d\n", + vid); + return err; + } + mlxsw_sp_port->pvid = vid; + } + + /* Changing activity bits only if HW operation succeded */ + for (vid = vid_begin; vid <= vid_end; vid++) + set_bit(vid, mlxsw_sp_port->active_vlans); + + return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, + mlxsw_sp_port->stp_state); +} + +static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_vlan *vlan, + struct switchdev_trans *trans) +{ + bool untagged_flag = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool pvid_flag = vlan->flags & BRIDGE_VLAN_INFO_PVID; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + return __mlxsw_sp_port_vlans_add(mlxsw_sp_port, + vlan->vid_begin, vlan->vid_end, + untagged_flag, pvid_flag); +} + +static int mlxsw_sp_port_fdb_op(struct mlxsw_sp_port *mlxsw_sp_port, + const char *mac, u16 vid, bool adding, + bool dynamic) +{ + enum mlxsw_reg_sfd_rec_policy policy; + enum mlxsw_reg_sfd_op op; + char *sfd_pl; + int err; + + if (!vid) + vid = mlxsw_sp_port->pvid; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + policy = dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; + op = adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : + MLXSW_REG_SFD_OP_WRITE_REMOVE; + mlxsw_reg_sfd_pack(sfd_pl, op, 0); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, + mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, + mlxsw_sp_port->local_port); + err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sfd), + sfd_pl); + kfree(sfd_pl); + + return err; +} + +static int +mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + if (switchdev_trans_ph_prepare(trans)) + return 0; + + return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, + true, false); +} + +static int mlxsw_sp_port_obj_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct switchdev_trans *trans) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_VLAN(obj), + trans); + break; + case SWITCHDEV_OBJ_ID_PORT_FDB: + err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_FDB(obj), + trans); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int mlxsw_sp_port_kill_vids(struct net_device *dev, u16 vid_begin, + u16 vid_end) +{ + u16 vid; + int err; + + for (vid = vid_begin; vid <= vid_end; vid++) { + err = mlxsw_sp_port_kill_vid(dev, 0, vid); + if (err) + return err; + } + + return 0; +} + +static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, bool init) +{ + struct net_device *dev = mlxsw_sp_port->dev; + u16 vid, vid_e; + int err; + + /* In case this is invoked with BRIDGE_FLAGS_SELF and port is + * not bridged, then prevent packets ingressing through the + * port with the specified VIDs from being trapped to CPU. + */ + if (!init && !mlxsw_sp_port->bridged) + return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end); + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), + vid_end); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, false, + false); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Unable to del VIDs %d-%d\n", + vid, vid_e); + return err; + } + } + + if ((mlxsw_sp_port->pvid >= vid_begin) && + (mlxsw_sp_port->pvid <= vid_end)) { + /* Default VLAN is always 1 */ + mlxsw_sp_port->pvid = 1; + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, + mlxsw_sp_port->pvid); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Unable to del PVID %d\n", + vid); + return err; + } + } + + if (init) + goto out; + + err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); + if (err) { + netdev_err(dev, "Failed to clear flooding\n"); + return err; + } + + for (vid = vid_begin; vid <= vid_end; vid++) { + /* Remove FID mapping in case of Virtual mode */ + err = mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid); + if (err) { + netdev_err(dev, "Failed to unmap FID=%d", vid); + return err; + } + } + +out: + /* Changing activity bits only if HW operation succeded */ + for (vid = vid_begin; vid <= vid_end; vid++) + clear_bit(vid, mlxsw_sp_port->active_vlans); + + return 0; +} + +static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_vlan *vlan) +{ + return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, + vlan->vid_begin, vlan->vid_end, false); +} + +static int +mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_port_fdb *fdb) +{ + return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, + false, false); +} + +static int mlxsw_sp_port_obj_del(struct net_device *dev, + const struct switchdev_obj *obj) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_VLAN(obj)); + break; + case SWITCHDEV_OBJ_ID_PORT_FDB: + err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_FDB(obj)); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_obj_port_fdb *fdb, + switchdev_obj_dump_cb_t *cb) +{ + char *sfd_pl; + char mac[ETH_ALEN]; + u16 vid; + u8 local_port; + u8 num_rec; + int stored_err = 0; + int i; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); + do { + mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT); + err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core, + MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + + /* Even in case of error, we have to run the dump to the end + * so the session in firmware is finished. + */ + if (stored_err) + continue; + + for (i = 0; i < num_rec; i++) { + switch (mlxsw_reg_sfd_rec_type_get(sfd_pl, i)) { + case MLXSW_REG_SFD_REC_TYPE_UNICAST: + mlxsw_reg_sfd_uc_unpack(sfd_pl, i, mac, &vid, + &local_port); + if (local_port == mlxsw_sp_port->local_port) { + ether_addr_copy(fdb->addr, mac); + fdb->ndm_state = NUD_REACHABLE; + fdb->vid = vid; + err = cb(&fdb->obj); + if (err) + stored_err = err; + } + } + } + } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT); + +out: + kfree(sfd_pl); + return stored_err ? stored_err : err; +} + +static int mlxsw_sp_port_vlan_dump(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_obj_port_vlan *vlan, + switchdev_obj_dump_cb_t *cb) +{ + u16 vid; + int err = 0; + + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { + vlan->flags = 0; + if (vid == mlxsw_sp_port->pvid) + vlan->flags |= BRIDGE_VLAN_INFO_PVID; + vlan->vid_begin = vid; + vlan->vid_end = vid; + err = cb(&vlan->obj); + if (err) + break; + } + return err; +} + +static int mlxsw_sp_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj, + switchdev_obj_dump_cb_t *cb) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err = 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = mlxsw_sp_port_vlan_dump(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_VLAN(obj), cb); + break; + case SWITCHDEV_OBJ_ID_PORT_FDB: + err = mlxsw_sp_port_fdb_dump(mlxsw_sp_port, + SWITCHDEV_OBJ_PORT_FDB(obj), cb); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { + .switchdev_port_attr_get = mlxsw_sp_port_attr_get, + .switchdev_port_attr_set = mlxsw_sp_port_attr_set, + .switchdev_port_obj_add = mlxsw_sp_port_obj_add, + .switchdev_port_obj_del = mlxsw_sp_port_obj_del, + .switchdev_port_obj_dump = mlxsw_sp_port_obj_dump, +}; + +static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index, + bool adding) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + char mac[ETH_ALEN]; + u8 local_port; + u16 vid; + int err; + + mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &vid, &local_port); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect local port in FDB notification\n"); + return; + } + + err = mlxsw_sp_port_fdb_op(mlxsw_sp_port, mac, vid, + adding && mlxsw_sp_port->learning, true); + if (err) { + if (net_ratelimit()) + netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); + return; + } + + if (mlxsw_sp_port->learning && mlxsw_sp_port->learning_sync) { + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + info.addr = mac; + info.vid = vid; + notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; + call_switchdev_notifiers(notifier_type, mlxsw_sp_port->dev, + &info.info); + } +} + +static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index) +{ + switch (mlxsw_reg_sfn_rec_type_get(sfn_pl, rec_index)) { + case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC: + mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC: + mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; + } +} + +static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + schedule_delayed_work(&mlxsw_sp->fdb_notify.dw, + msecs_to_jiffies(mlxsw_sp->fdb_notify.interval)); +} + +static void mlxsw_sp_fdb_notify_work(struct work_struct *work) +{ + struct mlxsw_sp *mlxsw_sp; + char *sfn_pl; + u8 num_rec; + int i; + int err; + + sfn_pl = kmalloc(MLXSW_REG_SFN_LEN, GFP_KERNEL); + if (!sfn_pl) + return; + + mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work); + + rtnl_lock(); + do { + mlxsw_reg_sfn_pack(sfn_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); + break; + } + num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); + + } while (num_rec); + rtnl_unlock(); + + kfree(sfn_pl); + mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); +} + +static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_ageing_set(mlxsw_sp, MLXSW_SP_DEFAULT_AGEING_TIME); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n"); + return err; + } + INIT_DELAYED_WORK(&mlxsw_sp->fdb_notify.dw, mlxsw_sp_fdb_notify_work); + mlxsw_sp->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; + mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); + return 0; +} + +static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) +{ + cancel_delayed_work_sync(&mlxsw_sp->fdb_notify.dw); +} + +static void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) +{ + u16 fid; + + for_each_set_bit(fid, mlxsw_sp->active_fids, VLAN_N_VID) + mlxsw_sp_fid_destroy(mlxsw_sp, fid); +} + +int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp_fdb_init(mlxsw_sp); +} + +void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_fdb_fini(mlxsw_sp); + mlxsw_sp_fids_fini(mlxsw_sp); +} + +int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + /* Allow only untagged packets to ingress and tag them internally + * with VID 1. + */ + mlxsw_sp_port->pvid = 1; + err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID, true); + if (err) { + netdev_err(dev, "Unable to init VLANs\n"); + return err; + } + + /* Add implicit VLAN interface in the device, so that untagged + * packets will be classified to the default vFID. + */ + err = mlxsw_sp_port_add_vid(dev, 0, 1); + if (err) + netdev_err(dev, "Failed to configure default vFID\n"); + + return err; +} + +void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops; +} + +void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ +} diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/kernel/drivers/net/ethernet/mellanox/mlxsw/switchx2.c new file mode 100644 index 000000000..d85960cfb --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -0,0 +1,1557 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/switchx2.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <net/switchdev.h> +#include <generated/utsrelease.h> + +#include "core.h" +#include "reg.h" +#include "port.h" +#include "trap.h" +#include "txheader.h" + +static const char mlxsw_sx_driver_name[] = "mlxsw_switchx2"; +static const char mlxsw_sx_driver_version[] = "1.0"; + +struct mlxsw_sx_port; + +struct mlxsw_sx { + struct mlxsw_sx_port **ports; + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + u8 hw_id[ETH_ALEN]; +}; + +struct mlxsw_sx_port_pcpu_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 tx_dropped; +}; + +struct mlxsw_sx_port { + struct net_device *dev; + struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats; + struct mlxsw_sx *mlxsw_sx; + u8 local_port; +}; + +/* tx_hdr_version + * Tx header version. + * Must be set to 0. + */ +MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4); + +/* tx_hdr_ctl + * Packet control type. + * 0 - Ethernet control (e.g. EMADs, LACP) + * 1 - Ethernet data + */ +MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2); + +/* tx_hdr_proto + * Packet protocol type. Must be set to 1 (Ethernet). + */ +MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3); + +/* tx_hdr_etclass + * Egress TClass to be used on the egress device on the egress port. + * The MSB is specified in the 'ctclass3' field. + * Range is 0-15, where 15 is the highest priority. + */ +MLXSW_ITEM32(tx, hdr, etclass, 0x00, 18, 3); + +/* tx_hdr_swid + * Switch partition ID. + */ +MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3); + +/* tx_hdr_port_mid + * Destination local port for unicast packets. + * Destination multicast ID for multicast packets. + * + * Control packets are directed to a specific egress port, while data + * packets are transmitted through the CPU port (0) into the switch partition, + * where forwarding rules are applied. + */ +MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16); + +/* tx_hdr_ctclass3 + * See field 'etclass'. + */ +MLXSW_ITEM32(tx, hdr, ctclass3, 0x04, 14, 1); + +/* tx_hdr_rdq + * RDQ for control packets sent to remote CPU. + * Must be set to 0x1F for EMADs, otherwise 0. + */ +MLXSW_ITEM32(tx, hdr, rdq, 0x04, 9, 5); + +/* tx_hdr_cpu_sig + * Signature control for packets going to CPU. Must be set to 0. + */ +MLXSW_ITEM32(tx, hdr, cpu_sig, 0x04, 0, 9); + +/* tx_hdr_sig + * Stacking protocl signature. Must be set to 0xE0E0. + */ +MLXSW_ITEM32(tx, hdr, sig, 0x0C, 16, 16); + +/* tx_hdr_stclass + * Stacking TClass. + */ +MLXSW_ITEM32(tx, hdr, stclass, 0x0C, 13, 3); + +/* tx_hdr_emad + * EMAD bit. Must be set for EMADs. + */ +MLXSW_ITEM32(tx, hdr, emad, 0x0C, 5, 1); + +/* tx_hdr_type + * 0 - Data packets + * 6 - Control packets + */ +MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); + +static void mlxsw_sx_txhdr_construct(struct sk_buff *skb, + const struct mlxsw_tx_info *tx_info) +{ + char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN); + bool is_emad = tx_info->is_emad; + + memset(txhdr, 0, MLXSW_TXHDR_LEN); + + /* We currently set default values for the egress tclass (QoS). */ + mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_0); + mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL); + mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); + mlxsw_tx_hdr_etclass_set(txhdr, is_emad ? MLXSW_TXHDR_ETCLASS_6 : + MLXSW_TXHDR_ETCLASS_5); + mlxsw_tx_hdr_swid_set(txhdr, 0); + mlxsw_tx_hdr_port_mid_set(txhdr, tx_info->local_port); + mlxsw_tx_hdr_ctclass3_set(txhdr, MLXSW_TXHDR_CTCLASS3); + mlxsw_tx_hdr_rdq_set(txhdr, is_emad ? MLXSW_TXHDR_RDQ_EMAD : + MLXSW_TXHDR_RDQ_OTHER); + mlxsw_tx_hdr_cpu_sig_set(txhdr, MLXSW_TXHDR_CPU_SIG); + mlxsw_tx_hdr_sig_set(txhdr, MLXSW_TXHDR_SIG); + mlxsw_tx_hdr_stclass_set(txhdr, MLXSW_TXHDR_STCLASS_NONE); + mlxsw_tx_hdr_emad_set(txhdr, is_emad ? MLXSW_TXHDR_EMAD : + MLXSW_TXHDR_NOT_EMAD); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); +} + +static int mlxsw_sx_port_admin_status_set(struct mlxsw_sx_port *mlxsw_sx_port, + bool is_up) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char paos_pl[MLXSW_REG_PAOS_LEN]; + + mlxsw_reg_paos_pack(paos_pl, mlxsw_sx_port->local_port, + is_up ? MLXSW_PORT_ADMIN_STATUS_UP : + MLXSW_PORT_ADMIN_STATUS_DOWN); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(paos), paos_pl); +} + +static int mlxsw_sx_port_oper_status_get(struct mlxsw_sx_port *mlxsw_sx_port, + bool *p_is_up) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char paos_pl[MLXSW_REG_PAOS_LEN]; + u8 oper_status; + int err; + + mlxsw_reg_paos_pack(paos_pl, mlxsw_sx_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(paos), paos_pl); + if (err) + return err; + oper_status = mlxsw_reg_paos_oper_status_get(paos_pl); + *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false; + return 0; +} + +static int mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port, u16 mtu) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char pmtu_pl[MLXSW_REG_PMTU_LEN]; + int max_mtu; + int err; + + mtu += MLXSW_TXHDR_LEN + ETH_HLEN; + mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl); + if (err) + return err; + max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl); + + if (mtu > max_mtu) + return -EINVAL; + + mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, mtu); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl); +} + +static int mlxsw_sx_port_swid_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 swid) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char pspa_pl[MLXSW_REG_PSPA_LEN]; + + mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sx_port->local_port); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pspa), pspa_pl); +} + +static int +mlxsw_sx_port_system_port_mapping_set(struct mlxsw_sx_port *mlxsw_sx_port) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char sspr_pl[MLXSW_REG_SSPR_LEN]; + + mlxsw_reg_sspr_pack(sspr_pl, mlxsw_sx_port->local_port); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sspr), sspr_pl); +} + +static int mlxsw_sx_port_module_check(struct mlxsw_sx_port *mlxsw_sx_port, + bool *p_usable) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + int err; + + mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sx_port->local_port); + err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + return err; + *p_usable = mlxsw_reg_pmlp_width_get(pmlp_pl) ? true : false; + return 0; +} + +static int mlxsw_sx_port_open(struct net_device *dev) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + int err; + + err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true); + if (err) + return err; + netif_start_queue(dev); + return 0; +} + +static int mlxsw_sx_port_stop(struct net_device *dev) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + + netif_stop_queue(dev); + return mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false); +} + +static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + struct mlxsw_sx_port_pcpu_stats *pcpu_stats; + const struct mlxsw_tx_info tx_info = { + .local_port = mlxsw_sx_port->local_port, + .is_emad = false, + }; + u64 len; + int err; + + if (mlxsw_core_skb_transmit_busy(mlxsw_sx, &tx_info)) + return NETDEV_TX_BUSY; + + if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { + struct sk_buff *skb_orig = skb; + + skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); + if (!skb) { + this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb_orig); + return NETDEV_TX_OK; + } + } + mlxsw_sx_txhdr_construct(skb, &tx_info); + len = skb->len; + /* Due to a race we might fail here because of a full queue. In that + * unlikely case we simply drop the packet. + */ + err = mlxsw_core_skb_transmit(mlxsw_sx, skb, &tx_info); + + if (!err) { + pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->tx_packets++; + pcpu_stats->tx_bytes += len; + u64_stats_update_end(&pcpu_stats->syncp); + } else { + this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + } + return NETDEV_TX_OK; +} + +static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + int err; + + err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu); + if (err) + return err; + dev->mtu = mtu; + return 0; +} + +static struct rtnl_link_stats64 * +mlxsw_sx_port_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx_port_pcpu_stats *p; + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + u32 tx_dropped = 0; + unsigned int start; + int i; + + for_each_possible_cpu(i) { + p = per_cpu_ptr(mlxsw_sx_port->pcpu_stats, i); + do { + start = u64_stats_fetch_begin_irq(&p->syncp); + rx_packets = p->rx_packets; + rx_bytes = p->rx_bytes; + tx_packets = p->tx_packets; + tx_bytes = p->tx_bytes; + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + /* tx_dropped is u32, updated without syncp protection. */ + tx_dropped += p->tx_dropped; + } + stats->tx_dropped = tx_dropped; + return stats; +} + +static const struct net_device_ops mlxsw_sx_port_netdev_ops = { + .ndo_open = mlxsw_sx_port_open, + .ndo_stop = mlxsw_sx_port_stop, + .ndo_start_xmit = mlxsw_sx_port_xmit, + .ndo_change_mtu = mlxsw_sx_port_change_mtu, + .ndo_get_stats64 = mlxsw_sx_port_get_stats64, +}; + +static void mlxsw_sx_port_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + + strlcpy(drvinfo->driver, mlxsw_sx_driver_name, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, mlxsw_sx_driver_version, + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + mlxsw_sx->bus_info->fw_rev.major, + mlxsw_sx->bus_info->fw_rev.minor, + mlxsw_sx->bus_info->fw_rev.subminor); + strlcpy(drvinfo->bus_info, mlxsw_sx->bus_info->device_name, + sizeof(drvinfo->bus_info)); +} + +struct mlxsw_sx_port_hw_stats { + char str[ETH_GSTRING_LEN]; + u64 (*getter)(char *payload); +}; + +static const struct mlxsw_sx_port_hw_stats mlxsw_sx_port_hw_stats[] = { + { + .str = "a_frames_transmitted_ok", + .getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get, + }, + { + .str = "a_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_frames_received_ok_get, + }, + { + .str = "a_frame_check_sequence_errors", + .getter = mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get, + }, + { + .str = "a_alignment_errors", + .getter = mlxsw_reg_ppcnt_a_alignment_errors_get, + }, + { + .str = "a_octets_transmitted_ok", + .getter = mlxsw_reg_ppcnt_a_octets_transmitted_ok_get, + }, + { + .str = "a_octets_received_ok", + .getter = mlxsw_reg_ppcnt_a_octets_received_ok_get, + }, + { + .str = "a_multicast_frames_xmitted_ok", + .getter = mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get, + }, + { + .str = "a_broadcast_frames_xmitted_ok", + .getter = mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get, + }, + { + .str = "a_multicast_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get, + }, + { + .str = "a_broadcast_frames_received_ok", + .getter = mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get, + }, + { + .str = "a_in_range_length_errors", + .getter = mlxsw_reg_ppcnt_a_in_range_length_errors_get, + }, + { + .str = "a_out_of_range_length_field", + .getter = mlxsw_reg_ppcnt_a_out_of_range_length_field_get, + }, + { + .str = "a_frame_too_long_errors", + .getter = mlxsw_reg_ppcnt_a_frame_too_long_errors_get, + }, + { + .str = "a_symbol_error_during_carrier", + .getter = mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get, + }, + { + .str = "a_mac_control_frames_transmitted", + .getter = mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get, + }, + { + .str = "a_mac_control_frames_received", + .getter = mlxsw_reg_ppcnt_a_mac_control_frames_received_get, + }, + { + .str = "a_unsupported_opcodes_received", + .getter = mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get, + }, + { + .str = "a_pause_mac_ctrl_frames_received", + .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_received_get, + }, + { + .str = "a_pause_mac_ctrl_frames_xmitted", + .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_transmitted_get, + }, +}; + +#define MLXSW_SX_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sx_port_hw_stats) + +static void mlxsw_sx_port_get_strings(struct net_device *dev, + u32 stringset, u8 *data) +{ + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++) { + memcpy(p, mlxsw_sx_port_hw_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + break; + } +} + +static void mlxsw_sx_port_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int i; + int err; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port); + err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppcnt), ppcnt_pl); + for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++) + data[i] = !err ? mlxsw_sx_port_hw_stats[i].getter(ppcnt_pl) : 0; +} + +static int mlxsw_sx_port_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return MLXSW_SX_PORT_HW_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +struct mlxsw_sx_port_link_mode { + u32 mask; + u32 supported; + u32 advertised; + u32 speed; +}; + +static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, + .supported = SUPPORTED_100baseT_Full, + .advertised = ADVERTISED_100baseT_Full, + .speed = 100, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX, + .speed = 100, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | + MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, + .supported = SUPPORTED_1000baseKX_Full, + .advertised = ADVERTISED_1000baseKX_Full, + .speed = 1000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T, + .supported = SUPPORTED_10000baseT_Full, + .advertised = ADVERTISED_10000baseT_Full, + .speed = 10000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, + .supported = SUPPORTED_10000baseKX4_Full, + .advertised = ADVERTISED_10000baseKX4_Full, + .speed = 10000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR, + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2, + .supported = SUPPORTED_20000baseKR2_Full, + .advertised = ADVERTISED_20000baseKR2_Full, + .speed = 20000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4, + .supported = SUPPORTED_40000baseCR4_Full, + .advertised = ADVERTISED_40000baseCR4_Full, + .speed = 40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4, + .supported = SUPPORTED_40000baseKR4_Full, + .advertised = ADVERTISED_40000baseKR4_Full, + .speed = 40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4, + .supported = SUPPORTED_40000baseSR4_Full, + .advertised = ADVERTISED_40000baseSR4_Full, + .speed = 40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4, + .supported = SUPPORTED_40000baseLR4_Full, + .advertised = ADVERTISED_40000baseLR4_Full, + .speed = 40000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .speed = 25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 | + MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, + .speed = 50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .supported = SUPPORTED_56000baseKR4_Full, + .advertised = ADVERTISED_56000baseKR4_Full, + .speed = 56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + .speed = 100000, + }, +}; + +#define MLXSW_SX_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sx_port_link_mode) + +static u32 mlxsw_sx_from_ptys_supported_port(u32 ptys_eth_proto) +{ + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_SGMII)) + return SUPPORTED_FIBRE; + + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX)) + return SUPPORTED_Backplane; + return 0; +} + +static u32 mlxsw_sx_from_ptys_supported_link(u32 ptys_eth_proto) +{ + u32 modes = 0; + int i; + + for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask) + modes |= mlxsw_sx_port_link_mode[i].supported; + } + return modes; +} + +static u32 mlxsw_sx_from_ptys_advert_link(u32 ptys_eth_proto) +{ + u32 modes = 0; + int i; + + for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask) + modes |= mlxsw_sx_port_link_mode[i].advertised; + } + return modes; +} + +static void mlxsw_sx_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, + struct ethtool_cmd *cmd) +{ + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + int i; + + if (!carrier_ok) + goto out; + + for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask) { + speed = mlxsw_sx_port_link_mode[i].speed; + duplex = DUPLEX_FULL; + break; + } + } +out: + ethtool_cmd_speed_set(cmd, speed); + cmd->duplex = duplex; +} + +static u8 mlxsw_sx_port_connector_port(u32 ptys_eth_proto) +{ + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | + MLXSW_REG_PTYS_ETH_SPEED_SGMII)) + return PORT_FIBRE; + + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4)) + return PORT_DA; + + if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | + MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4)) + return PORT_NONE; + + return PORT_OTHER; +} + +static int mlxsw_sx_port_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_cap; + u32 eth_proto_admin; + u32 eth_proto_oper; + int err; + + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); + if (err) { + netdev_err(dev, "Failed to get proto"); + return err; + } + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, + ð_proto_admin, ð_proto_oper); + + cmd->supported = mlxsw_sx_from_ptys_supported_port(eth_proto_cap) | + mlxsw_sx_from_ptys_supported_link(eth_proto_cap) | + SUPPORTED_Pause | SUPPORTED_Asym_Pause; + cmd->advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_admin); + mlxsw_sx_from_ptys_speed_duplex(netif_carrier_ok(dev), + eth_proto_oper, cmd); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; + cmd->port = mlxsw_sx_port_connector_port(eth_proto_oper); + cmd->lp_advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_oper); + + cmd->transceiver = XCVR_INTERNAL; + return 0; +} + +static u32 mlxsw_sx_to_ptys_advert_link(u32 advertising) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) { + if (advertising & mlxsw_sx_port_link_mode[i].advertised) + ptys_proto |= mlxsw_sx_port_link_mode[i].mask; + } + return ptys_proto; +} + +static u32 mlxsw_sx_to_ptys_speed(u32 speed) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) { + if (speed == mlxsw_sx_port_link_mode[i].speed) + ptys_proto |= mlxsw_sx_port_link_mode[i].mask; + } + return ptys_proto; +} + +static int mlxsw_sx_port_set_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 speed; + u32 eth_proto_new; + u32 eth_proto_cap; + u32 eth_proto_admin; + bool is_up; + int err; + + speed = ethtool_cmd_speed(cmd); + + eth_proto_new = cmd->autoneg == AUTONEG_ENABLE ? + mlxsw_sx_to_ptys_advert_link(cmd->advertising) : + mlxsw_sx_to_ptys_speed(speed); + + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); + if (err) { + netdev_err(dev, "Failed to get proto"); + return err; + } + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, NULL); + + eth_proto_new = eth_proto_new & eth_proto_cap; + if (!eth_proto_new) { + netdev_err(dev, "Not supported proto admin requested"); + return -EINVAL; + } + if (eth_proto_new == eth_proto_admin) + return 0; + + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, eth_proto_new); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); + if (err) { + netdev_err(dev, "Failed to set proto admin"); + return err; + } + + err = mlxsw_sx_port_oper_status_get(mlxsw_sx_port, &is_up); + if (err) { + netdev_err(dev, "Failed to get oper status"); + return err; + } + if (!is_up) + return 0; + + err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false); + if (err) { + netdev_err(dev, "Failed to set admin status"); + return err; + } + + err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true); + if (err) { + netdev_err(dev, "Failed to set admin status"); + return err; + } + + return 0; +} + +static const struct ethtool_ops mlxsw_sx_port_ethtool_ops = { + .get_drvinfo = mlxsw_sx_port_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlxsw_sx_port_get_strings, + .get_ethtool_stats = mlxsw_sx_port_get_stats, + .get_sset_count = mlxsw_sx_port_get_sset_count, + .get_settings = mlxsw_sx_port_get_settings, + .set_settings = mlxsw_sx_port_set_settings, +}; + +static int mlxsw_sx_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id); + memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct switchdev_ops mlxsw_sx_port_switchdev_ops = { + .switchdev_port_attr_get = mlxsw_sx_port_attr_get, +}; + +static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx) +{ + char spad_pl[MLXSW_REG_SPAD_LEN]; + int err; + + err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(spad), spad_pl); + if (err) + return err; + mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_sx->hw_id); + return 0; +} + +static int mlxsw_sx_port_dev_addr_get(struct mlxsw_sx_port *mlxsw_sx_port) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + struct net_device *dev = mlxsw_sx_port->dev; + char ppad_pl[MLXSW_REG_PPAD_LEN]; + int err; + + mlxsw_reg_ppad_pack(ppad_pl, false, 0); + err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppad), ppad_pl); + if (err) + return err; + mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, dev->dev_addr); + /* The last byte value in base mac address is guaranteed + * to be such it does not overflow when adding local_port + * value. + */ + dev->dev_addr[ETH_ALEN - 1] += mlxsw_sx_port->local_port; + return 0; +} + +static int mlxsw_sx_port_stp_state_set(struct mlxsw_sx_port *mlxsw_sx_port, + u16 vid, enum mlxsw_reg_spms_state state) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char *spms_pl; + int err; + + spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); + if (!spms_pl) + return -ENOMEM; + mlxsw_reg_spms_pack(spms_pl, mlxsw_sx_port->local_port); + mlxsw_reg_spms_vid_pack(spms_pl, vid, state); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spms), spms_pl); + kfree(spms_pl); + return err; +} + +static int mlxsw_sx_port_speed_set(struct mlxsw_sx_port *mlxsw_sx_port, + u32 speed) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, speed); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl); +} + +static int +mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port, + enum mlxsw_reg_spmlr_learn_mode mode) +{ + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + char spmlr_pl[MLXSW_REG_SPMLR_LEN]; + + mlxsw_reg_spmlr_pack(spmlr_pl, mlxsw_sx_port->local_port, mode); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spmlr), spmlr_pl); +} + +static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) +{ + struct mlxsw_sx_port *mlxsw_sx_port; + struct net_device *dev; + bool usable; + int err; + + dev = alloc_etherdev(sizeof(struct mlxsw_sx_port)); + if (!dev) + return -ENOMEM; + mlxsw_sx_port = netdev_priv(dev); + mlxsw_sx_port->dev = dev; + mlxsw_sx_port->mlxsw_sx = mlxsw_sx; + mlxsw_sx_port->local_port = local_port; + + mlxsw_sx_port->pcpu_stats = + netdev_alloc_pcpu_stats(struct mlxsw_sx_port_pcpu_stats); + if (!mlxsw_sx_port->pcpu_stats) { + err = -ENOMEM; + goto err_alloc_stats; + } + + dev->netdev_ops = &mlxsw_sx_port_netdev_ops; + dev->ethtool_ops = &mlxsw_sx_port_ethtool_ops; + dev->switchdev_ops = &mlxsw_sx_port_switchdev_ops; + + err = mlxsw_sx_port_dev_addr_get(mlxsw_sx_port); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Unable to get port mac address\n", + mlxsw_sx_port->local_port); + goto err_dev_addr_get; + } + + netif_carrier_off(dev); + + dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG | + NETIF_F_VLAN_CHALLENGED; + + /* Each packet needs to have a Tx header (metadata) on top all other + * headers. + */ + dev->hard_header_len += MLXSW_TXHDR_LEN; + + err = mlxsw_sx_port_module_check(mlxsw_sx_port, &usable); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to check module\n", + mlxsw_sx_port->local_port); + goto err_port_module_check; + } + + if (!usable) { + dev_dbg(mlxsw_sx->bus_info->dev, "Port %d: Not usable, skipping initialization\n", + mlxsw_sx_port->local_port); + goto port_not_usable; + } + + err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n", + mlxsw_sx_port->local_port); + goto err_port_system_port_mapping_set; + } + + err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 0); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n", + mlxsw_sx_port->local_port); + goto err_port_swid_set; + } + + err = mlxsw_sx_port_speed_set(mlxsw_sx_port, + MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n", + mlxsw_sx_port->local_port); + goto err_port_speed_set; + } + + err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, ETH_DATA_LEN); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n", + mlxsw_sx_port->local_port); + goto err_port_mtu_set; + } + + err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false); + if (err) + goto err_port_admin_status_set; + + err = mlxsw_sx_port_stp_state_set(mlxsw_sx_port, + MLXSW_PORT_DEFAULT_VID, + MLXSW_REG_SPMS_STATE_FORWARDING); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set STP state\n", + mlxsw_sx_port->local_port); + goto err_port_stp_state_set; + } + + err = mlxsw_sx_port_mac_learning_mode_set(mlxsw_sx_port, + MLXSW_REG_SPMLR_LEARN_MODE_DISABLE); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MAC learning mode\n", + mlxsw_sx_port->local_port); + goto err_port_mac_learning_mode_set; + } + + err = register_netdev(dev); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to register netdev\n", + mlxsw_sx_port->local_port); + goto err_register_netdev; + } + + mlxsw_sx->ports[local_port] = mlxsw_sx_port; + return 0; + +err_register_netdev: +err_port_mac_learning_mode_set: +err_port_stp_state_set: +err_port_admin_status_set: +err_port_mtu_set: +err_port_speed_set: +err_port_swid_set: +err_port_system_port_mapping_set: +port_not_usable: +err_port_module_check: +err_dev_addr_get: + free_percpu(mlxsw_sx_port->pcpu_stats); +err_alloc_stats: + free_netdev(dev); + return err; +} + +static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) +{ + struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port]; + + if (!mlxsw_sx_port) + return; + unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */ + mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); + free_percpu(mlxsw_sx_port->pcpu_stats); + free_netdev(mlxsw_sx_port->dev); +} + +static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx) +{ + int i; + + for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + mlxsw_sx_port_remove(mlxsw_sx, i); + kfree(mlxsw_sx->ports); +} + +static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx) +{ + size_t alloc_size; + int i; + int err; + + alloc_size = sizeof(struct mlxsw_sx_port *) * MLXSW_PORT_MAX_PORTS; + mlxsw_sx->ports = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_sx->ports) + return -ENOMEM; + + for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { + err = mlxsw_sx_port_create(mlxsw_sx, i); + if (err) + goto err_port_create; + } + return 0; + +err_port_create: + for (i--; i >= 1; i--) + mlxsw_sx_port_remove(mlxsw_sx, i); + kfree(mlxsw_sx->ports); + return err; +} + +static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg, + char *pude_pl, void *priv) +{ + struct mlxsw_sx *mlxsw_sx = priv; + struct mlxsw_sx_port *mlxsw_sx_port; + enum mlxsw_reg_pude_oper_status status; + u8 local_port; + + local_port = mlxsw_reg_pude_local_port_get(pude_pl); + mlxsw_sx_port = mlxsw_sx->ports[local_port]; + if (!mlxsw_sx_port) { + dev_warn(mlxsw_sx->bus_info->dev, "Port %d: Link event received for non-existent port\n", + local_port); + return; + } + + status = mlxsw_reg_pude_oper_status_get(pude_pl); + if (status == MLXSW_PORT_OPER_STATUS_UP) { + netdev_info(mlxsw_sx_port->dev, "link up\n"); + netif_carrier_on(mlxsw_sx_port->dev); + } else { + netdev_info(mlxsw_sx_port->dev, "link down\n"); + netif_carrier_off(mlxsw_sx_port->dev); + } +} + +static struct mlxsw_event_listener mlxsw_sx_pude_event = { + .func = mlxsw_sx_pude_event_func, + .trap_id = MLXSW_TRAP_ID_PUDE, +}; + +static int mlxsw_sx_event_register(struct mlxsw_sx *mlxsw_sx, + enum mlxsw_event_trap_id trap_id) +{ + struct mlxsw_event_listener *el; + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int err; + + switch (trap_id) { + case MLXSW_TRAP_ID_PUDE: + el = &mlxsw_sx_pude_event; + break; + } + err = mlxsw_core_event_listener_register(mlxsw_sx->core, el, mlxsw_sx); + if (err) + return err; + + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, trap_id); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + goto err_event_trap_set; + + return 0; + +err_event_trap_set: + mlxsw_core_event_listener_unregister(mlxsw_sx->core, el, mlxsw_sx); + return err; +} + +static void mlxsw_sx_event_unregister(struct mlxsw_sx *mlxsw_sx, + enum mlxsw_event_trap_id trap_id) +{ + struct mlxsw_event_listener *el; + + switch (trap_id) { + case MLXSW_TRAP_ID_PUDE: + el = &mlxsw_sx_pude_event; + break; + } + mlxsw_core_event_listener_unregister(mlxsw_sx->core, el, mlxsw_sx); +} + +static void mlxsw_sx_rx_listener_func(struct sk_buff *skb, u8 local_port, + void *priv) +{ + struct mlxsw_sx *mlxsw_sx = priv; + struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port]; + struct mlxsw_sx_port_pcpu_stats *pcpu_stats; + + if (unlikely(!mlxsw_sx_port)) { + dev_warn_ratelimited(mlxsw_sx->bus_info->dev, "Port %d: skb received for non-existent port\n", + local_port); + return; + } + + skb->dev = mlxsw_sx_port->dev; + + pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->protocol = eth_type_trans(skb, skb->dev); + netif_receive_skb(skb); +} + +static const struct mlxsw_rx_listener mlxsw_sx_rx_listener[] = { + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_FDB_MC, + }, + /* Traps for specific L2 packet types, not trapped as FDB MC */ + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_STP, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_LACP, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_EAPOL, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_LLDP, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_MMRP, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_MVRP, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_RPVST, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_DHCP, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_QUERY, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE, + }, + { + .func = mlxsw_sx_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT, + }, +}; + +static int mlxsw_sx_traps_init(struct mlxsw_sx *mlxsw_sx) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int i; + int err; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_RX); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + + mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_CTRL); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sx_rx_listener); i++) { + err = mlxsw_core_rx_listener_register(mlxsw_sx->core, + &mlxsw_sx_rx_listener[i], + mlxsw_sx); + if (err) + goto err_rx_listener_register; + + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, + mlxsw_sx_rx_listener[i].trap_id); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl); + if (err) + goto err_rx_trap_set; + } + return 0; + +err_rx_trap_set: + mlxsw_core_rx_listener_unregister(mlxsw_sx->core, + &mlxsw_sx_rx_listener[i], + mlxsw_sx); +err_rx_listener_register: + for (i--; i >= 0; i--) { + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_sx_rx_listener[i].trap_id); + mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl); + + mlxsw_core_rx_listener_unregister(mlxsw_sx->core, + &mlxsw_sx_rx_listener[i], + mlxsw_sx); + } + return err; +} + +static void mlxsw_sx_traps_fini(struct mlxsw_sx *mlxsw_sx) +{ + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sx_rx_listener); i++) { + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_sx_rx_listener[i].trap_id); + mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl); + + mlxsw_core_rx_listener_unregister(mlxsw_sx->core, + &mlxsw_sx_rx_listener[i], + mlxsw_sx); + } +} + +static int mlxsw_sx_flood_init(struct mlxsw_sx *mlxsw_sx) +{ + char sfgc_pl[MLXSW_REG_SFGC_LEN]; + char sgcr_pl[MLXSW_REG_SGCR_LEN]; + char *sftr_pl; + int err; + + /* Configure a flooding table, which includes only CPU port. */ + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) + return -ENOMEM; + mlxsw_reg_sftr_pack(sftr_pl, 0, 0, MLXSW_REG_SFGC_TABLE_TYPE_SINGLE, 0, + MLXSW_PORT_CPU_PORT, true); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sftr), sftr_pl); + kfree(sftr_pl); + if (err) + return err; + + /* Flood different packet types using the flooding table. */ + mlxsw_reg_sfgc_pack(sfgc_pl, + MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST, + MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, + MLXSW_REG_SFGC_TABLE_TYPE_SINGLE, + 0); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl); + if (err) + return err; + + mlxsw_reg_sfgc_pack(sfgc_pl, + MLXSW_REG_SFGC_TYPE_BROADCAST, + MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, + MLXSW_REG_SFGC_TABLE_TYPE_SINGLE, + 0); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl); + if (err) + return err; + + mlxsw_reg_sfgc_pack(sfgc_pl, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP, + MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, + MLXSW_REG_SFGC_TABLE_TYPE_SINGLE, + 0); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl); + if (err) + return err; + + mlxsw_reg_sfgc_pack(sfgc_pl, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6, + MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, + MLXSW_REG_SFGC_TABLE_TYPE_SINGLE, + 0); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl); + if (err) + return err; + + mlxsw_reg_sfgc_pack(sfgc_pl, + MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4, + MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID, + MLXSW_REG_SFGC_TABLE_TYPE_SINGLE, + 0); + err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl); + if (err) + return err; + + mlxsw_reg_sgcr_pack(sgcr_pl, true); + return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sgcr), sgcr_pl); +} + +static int mlxsw_sx_init(void *priv, struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info) +{ + struct mlxsw_sx *mlxsw_sx = priv; + int err; + + mlxsw_sx->core = mlxsw_core; + mlxsw_sx->bus_info = mlxsw_bus_info; + + err = mlxsw_sx_hw_id_get(mlxsw_sx); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Failed to get switch HW ID\n"); + return err; + } + + err = mlxsw_sx_ports_create(mlxsw_sx); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Failed to create ports\n"); + return err; + } + + err = mlxsw_sx_event_register(mlxsw_sx, MLXSW_TRAP_ID_PUDE); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Failed to register for PUDE events\n"); + goto err_event_register; + } + + err = mlxsw_sx_traps_init(mlxsw_sx); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Failed to set traps for RX\n"); + goto err_rx_listener_register; + } + + err = mlxsw_sx_flood_init(mlxsw_sx); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Failed to initialize flood tables\n"); + goto err_flood_init; + } + + return 0; + +err_flood_init: + mlxsw_sx_traps_fini(mlxsw_sx); +err_rx_listener_register: + mlxsw_sx_event_unregister(mlxsw_sx, MLXSW_TRAP_ID_PUDE); +err_event_register: + mlxsw_sx_ports_remove(mlxsw_sx); + return err; +} + +static void mlxsw_sx_fini(void *priv) +{ + struct mlxsw_sx *mlxsw_sx = priv; + + mlxsw_sx_traps_fini(mlxsw_sx); + mlxsw_sx_event_unregister(mlxsw_sx, MLXSW_TRAP_ID_PUDE); + mlxsw_sx_ports_remove(mlxsw_sx); +} + +static struct mlxsw_config_profile mlxsw_sx_config_profile = { + .used_max_vepa_channels = 1, + .max_vepa_channels = 0, + .used_max_lag = 1, + .max_lag = 64, + .used_max_port_per_lag = 1, + .max_port_per_lag = 16, + .used_max_mid = 1, + .max_mid = 7000, + .used_max_pgt = 1, + .max_pgt = 0, + .used_max_system_port = 1, + .max_system_port = 48000, + .used_max_vlan_groups = 1, + .max_vlan_groups = 127, + .used_max_regions = 1, + .max_regions = 400, + .used_flood_tables = 1, + .max_flood_tables = 2, + .max_vid_flood_tables = 1, + .used_flood_mode = 1, + .flood_mode = 3, + .used_max_ib_mc = 1, + .max_ib_mc = 0, + .used_max_pkey = 1, + .max_pkey = 0, + .swid_config = { + { + .used_type = 1, + .type = MLXSW_PORT_SWID_TYPE_ETH, + } + }, +}; + +static struct mlxsw_driver mlxsw_sx_driver = { + .kind = MLXSW_DEVICE_KIND_SWITCHX2, + .owner = THIS_MODULE, + .priv_size = sizeof(struct mlxsw_sx), + .init = mlxsw_sx_init, + .fini = mlxsw_sx_fini, + .txhdr_construct = mlxsw_sx_txhdr_construct, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sx_config_profile, +}; + +static int __init mlxsw_sx_module_init(void) +{ + return mlxsw_core_driver_register(&mlxsw_sx_driver); +} + +static void __exit mlxsw_sx_module_exit(void) +{ + mlxsw_core_driver_unregister(&mlxsw_sx_driver); +} + +module_init(mlxsw_sx_module_init); +module_exit(mlxsw_sx_module_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox SwitchX-2 driver"); +MODULE_MLXSW_DRIVER_ALIAS(MLXSW_DEVICE_KIND_SWITCHX2); diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/trap.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/trap.h new file mode 100644 index 000000000..53a9550be --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -0,0 +1,66 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/trap.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MLXSW_TRAP_H +#define _MLXSW_TRAP_H + +enum { + /* Ethernet EMAD and FDB miss */ + MLXSW_TRAP_ID_FDB_MC = 0x01, + MLXSW_TRAP_ID_ETHEMAD = 0x05, + /* L2 traps for specific packet types */ + MLXSW_TRAP_ID_STP = 0x10, + MLXSW_TRAP_ID_LACP = 0x11, + MLXSW_TRAP_ID_EAPOL = 0x12, + MLXSW_TRAP_ID_LLDP = 0x13, + MLXSW_TRAP_ID_MMRP = 0x14, + MLXSW_TRAP_ID_MVRP = 0x15, + MLXSW_TRAP_ID_RPVST = 0x16, + MLXSW_TRAP_ID_DHCP = 0x19, + MLXSW_TRAP_ID_IGMP_QUERY = 0x30, + MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31, + MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, + MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33, + MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, + + MLXSW_TRAP_ID_MAX = 0x1FF +}; + +enum mlxsw_event_trap_id { + /* Port Up/Down event generated by hardware */ + MLXSW_TRAP_ID_PUDE = 0x8, +}; + +#endif /* _MLXSW_TRAP_H */ diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/txheader.h b/kernel/drivers/net/ethernet/mellanox/mlxsw/txheader.h new file mode 100644 index 000000000..fdf94720c --- /dev/null +++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/txheader.h @@ -0,0 +1,81 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/txheader.h + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_TXHEADER_H +#define _MLXSW_TXHEADER_H + +#define MLXSW_TXHDR_LEN 0x10 +#define MLXSW_TXHDR_VERSION_0 0 +#define MLXSW_TXHDR_VERSION_1 1 + +enum { + MLXSW_TXHDR_ETH_CTL, + MLXSW_TXHDR_ETH_DATA, +}; + +#define MLXSW_TXHDR_PROTO_ETH 1 + +enum { + MLXSW_TXHDR_ETCLASS_0, + MLXSW_TXHDR_ETCLASS_1, + MLXSW_TXHDR_ETCLASS_2, + MLXSW_TXHDR_ETCLASS_3, + MLXSW_TXHDR_ETCLASS_4, + MLXSW_TXHDR_ETCLASS_5, + MLXSW_TXHDR_ETCLASS_6, + MLXSW_TXHDR_ETCLASS_7, +}; + +enum { + MLXSW_TXHDR_RDQ_OTHER, + MLXSW_TXHDR_RDQ_EMAD = 0x1f, +}; + +#define MLXSW_TXHDR_CTCLASS3 0 +#define MLXSW_TXHDR_CPU_SIG 0 +#define MLXSW_TXHDR_SIG 0xE0E0 +#define MLXSW_TXHDR_STCLASS_NONE 0 + +enum { + MLXSW_TXHDR_NOT_EMAD, + MLXSW_TXHDR_EMAD, +}; + +enum { + MLXSW_TXHDR_TYPE_DATA, + MLXSW_TXHDR_TYPE_CONTROL = 6, +}; + +#endif |