summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/net/hyperv
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/net/hyperv
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/drivers/net/hyperv')
-rw-r--r--kernel/drivers/net/hyperv/hyperv_net.h43
-rw-r--r--kernel/drivers/net/hyperv/netvsc.c98
-rw-r--r--kernel/drivers/net/hyperv/netvsc_drv.c261
-rw-r--r--kernel/drivers/net/hyperv/rndis_filter.c52
4 files changed, 403 insertions, 51 deletions
diff --git a/kernel/drivers/net/hyperv/hyperv_net.h b/kernel/drivers/net/hyperv/hyperv_net.h
index 41071d32b..5fa98f599 100644
--- a/kernel/drivers/net/hyperv/hyperv_net.h
+++ b/kernel/drivers/net/hyperv/hyperv_net.h
@@ -161,6 +161,8 @@ struct netvsc_device_info {
unsigned char mac_adr[ETH_ALEN];
bool link_state; /* 0 - link up, 1 - link down */
int ring_size;
+ u32 max_num_vrss_chns;
+ u32 num_chn;
};
enum rndis_device_state {
@@ -540,6 +542,29 @@ union nvsp_2_message_uber {
struct nvsp_2_free_rxbuf free_rxbuf;
} __packed;
+struct nvsp_4_send_vf_association {
+ /* 1: allocated, serial number is valid. 0: not allocated */
+ u32 allocated;
+
+ /* Serial number of the VF to team with */
+ u32 serial;
+} __packed;
+
+enum nvsp_vm_datapath {
+ NVSP_DATAPATH_SYNTHETIC = 0,
+ NVSP_DATAPATH_VF,
+ NVSP_DATAPATH_MAX
+};
+
+struct nvsp_4_sw_datapath {
+ u32 active_datapath; /* active data path in VM */
+} __packed;
+
+union nvsp_4_message_uber {
+ struct nvsp_4_send_vf_association vf_assoc;
+ struct nvsp_4_sw_datapath active_dp;
+} __packed;
+
enum nvsp_subchannel_operation {
NVSP_SUBCHANNEL_NONE = 0,
NVSP_SUBCHANNEL_ALLOCATE,
@@ -577,6 +602,7 @@ union nvsp_all_messages {
union nvsp_message_init_uber init_msg;
union nvsp_1_message_uber v1_msg;
union nvsp_2_message_uber v2_msg;
+ union nvsp_4_message_uber v4_msg;
union nvsp_5_message_uber v5_msg;
} __packed;
@@ -588,6 +614,7 @@ struct nvsp_message {
#define NETVSC_MTU 65536
+#define NETVSC_MTU_MIN 68
#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */
#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */
@@ -611,6 +638,12 @@ struct multi_send_data {
u32 count; /* counter of batched packets */
};
+struct netvsc_stats {
+ u64 packets;
+ u64 bytes;
+ struct u64_stats_sync syncp;
+};
+
/* The context of the netvsc device */
struct net_device_context {
/* point back to our device context */
@@ -618,6 +651,9 @@ struct net_device_context {
struct delayed_work dwork;
struct work_struct work;
u32 msg_enable; /* debug level */
+
+ struct netvsc_stats __percpu *tx_stats;
+ struct netvsc_stats __percpu *rx_stats;
};
/* Per netvsc device */
@@ -660,6 +696,8 @@ struct netvsc_device {
u32 send_table[VRSS_SEND_TAB_SIZE];
u32 max_chn;
u32 num_chn;
+ spinlock_t sc_lock; /* Protects num_sc_offered variable */
+ u32 num_sc_offered;
atomic_t queue_sends[NR_CPUS];
/* Holds rndis device info */
@@ -678,6 +716,11 @@ struct netvsc_device {
/* The net device context */
struct net_device_context *nd_ctx;
+
+ /* 1: allocated, serial number is valid. 0: not allocated */
+ u32 vf_alloc;
+ /* Serial number of the VF to team with */
+ u32 vf_serial;
};
/* NdisInitialize message */
diff --git a/kernel/drivers/net/hyperv/netvsc.c b/kernel/drivers/net/hyperv/netvsc.c
index ea091bc5f..51e4c0fd0 100644
--- a/kernel/drivers/net/hyperv/netvsc.c
+++ b/kernel/drivers/net/hyperv/netvsc.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
+#include <linux/vmalloc.h>
#include <asm/sync_bitops.h>
#include "hyperv_net.h"
@@ -227,13 +228,18 @@ static int netvsc_init_buf(struct hv_device *device)
struct netvsc_device *net_device;
struct nvsp_message *init_packet;
struct net_device *ndev;
+ int node;
net_device = get_outbound_net_device(device);
if (!net_device)
return -ENODEV;
ndev = net_device->ndev;
- net_device->recv_buf = vzalloc(net_device->recv_buf_size);
+ node = cpu_to_node(device->channel->target_cpu);
+ net_device->recv_buf = vzalloc_node(net_device->recv_buf_size, node);
+ if (!net_device->recv_buf)
+ net_device->recv_buf = vzalloc(net_device->recv_buf_size);
+
if (!net_device->recv_buf) {
netdev_err(ndev, "unable to allocate receive "
"buffer of size %d\n", net_device->recv_buf_size);
@@ -321,7 +327,9 @@ static int netvsc_init_buf(struct hv_device *device)
/* Now setup the send buffer.
*/
- net_device->send_buf = vzalloc(net_device->send_buf_size);
+ net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
+ if (!net_device->send_buf)
+ net_device->send_buf = vzalloc(net_device->send_buf_size);
if (!net_device->send_buf) {
netdev_err(ndev, "unable to allocate send "
"buffer of size %d\n", net_device->send_buf_size);
@@ -445,13 +453,16 @@ static int negotiate_nvsp_ver(struct hv_device *device,
if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
return 0;
- /* NVSPv2 only: Send NDIS config */
+ /* NVSPv2 or later: Send NDIS config */
memset(init_packet, 0, sizeof(struct nvsp_message));
init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu +
ETH_HLEN;
init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
+ if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5)
+ init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
+
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
(unsigned long)init_packet,
@@ -743,6 +754,7 @@ static inline int netvsc_send_pkt(
u64 req_id;
int ret;
struct hv_page_buffer *pgbuf;
+ u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
if (packet->is_data_pkt) {
@@ -769,32 +781,42 @@ static inline int netvsc_send_pkt(
if (out_channel->rescind)
return -ENODEV;
+ /*
+ * It is possible that once we successfully place this packet
+ * on the ringbuffer, we may stop the queue. In that case, we want
+ * to notify the host independent of the xmit_more flag. We don't
+ * need to be precise here; in the worst case we may signal the host
+ * unnecessarily.
+ */
+ if (ring_avail < (RING_AVAIL_PERCENT_LOWATER + 1))
+ packet->xmit_more = false;
+
if (packet->page_buf_cnt) {
pgbuf = packet->cp_partial ? packet->page_buf +
packet->rmsg_pgcnt : packet->page_buf;
- ret = vmbus_sendpacket_pagebuffer(out_channel,
- pgbuf,
- packet->page_buf_cnt,
- &nvmsg,
- sizeof(struct nvsp_message),
- req_id);
+ ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
+ pgbuf,
+ packet->page_buf_cnt,
+ &nvmsg,
+ sizeof(struct nvsp_message),
+ req_id,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED,
+ !packet->xmit_more);
} else {
- ret = vmbus_sendpacket(
- out_channel, &nvmsg,
- sizeof(struct nvsp_message),
- req_id,
- VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ ret = vmbus_sendpacket_ctl(out_channel, &nvmsg,
+ sizeof(struct nvsp_message),
+ req_id,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED,
+ !packet->xmit_more);
}
if (ret == 0) {
atomic_inc(&net_device->num_outstanding_sends);
atomic_inc(&net_device->queue_sends[q_idx]);
- if (hv_ringbuf_avail_percent(&out_channel->outbound) <
- RING_AVAIL_PERCENT_LOWATER) {
- netif_tx_stop_queue(netdev_get_tx_queue(
- ndev, q_idx));
+ if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
+ netif_tx_stop_queue(netdev_get_tx_queue(ndev, q_idx));
if (atomic_read(&net_device->
queue_sends[q_idx]) < 1)
@@ -1045,11 +1067,10 @@ static void netvsc_receive(struct netvsc_device *net_device,
static void netvsc_send_table(struct hv_device *hdev,
- struct vmpacket_descriptor *vmpkt)
+ struct nvsp_message *nvmsg)
{
struct netvsc_device *nvscdev;
struct net_device *ndev;
- struct nvsp_message *nvmsg;
int i;
u32 count, *tab;
@@ -1058,12 +1079,6 @@ static void netvsc_send_table(struct hv_device *hdev,
return;
ndev = nvscdev->ndev;
- nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
- (vmpkt->offset8 << 3));
-
- if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
- return;
-
count = nvmsg->msg.v5_msg.send_table.count;
if (count != VRSS_SEND_TAB_SIZE) {
netdev_err(ndev, "Received wrong send-table size:%u\n", count);
@@ -1077,6 +1092,28 @@ static void netvsc_send_table(struct hv_device *hdev,
nvscdev->send_table[i] = tab[i];
}
+static void netvsc_send_vf(struct netvsc_device *nvdev,
+ struct nvsp_message *nvmsg)
+{
+ nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+ nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+}
+
+static inline void netvsc_receive_inband(struct hv_device *hdev,
+ struct netvsc_device *nvdev,
+ struct nvsp_message *nvmsg)
+{
+ switch (nvmsg->hdr.msg_type) {
+ case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
+ netvsc_send_table(hdev, nvmsg);
+ break;
+
+ case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
+ netvsc_send_vf(nvdev, nvmsg);
+ break;
+ }
+}
+
void netvsc_channel_cb(void *context)
{
int ret;
@@ -1089,6 +1126,7 @@ void netvsc_channel_cb(void *context)
unsigned char *buffer;
int bufferlen = NETVSC_PACKET_SIZE;
struct net_device *ndev;
+ struct nvsp_message *nvmsg;
if (channel->primary_channel != NULL)
device = channel->primary_channel->device_obj;
@@ -1107,6 +1145,8 @@ void netvsc_channel_cb(void *context)
if (ret == 0) {
if (bytes_recvd > 0) {
desc = (struct vmpacket_descriptor *)buffer;
+ nvmsg = (struct nvsp_message *)((unsigned long)
+ desc + (desc->offset8 << 3));
switch (desc->type) {
case VM_PKT_COMP:
netvsc_send_completion(net_device,
@@ -1119,7 +1159,9 @@ void netvsc_channel_cb(void *context)
break;
case VM_PKT_DATA_INBAND:
- netvsc_send_table(device, desc);
+ netvsc_receive_inband(device,
+ net_device,
+ nvmsg);
break;
default:
diff --git a/kernel/drivers/net/hyperv/netvsc_drv.c b/kernel/drivers/net/hyperv/netvsc_drv.c
index 5993c7e2d..409b48e1e 100644
--- a/kernel/drivers/net/hyperv/netvsc_drv.c
+++ b/kernel/drivers/net/hyperv/netvsc_drv.c
@@ -46,6 +46,8 @@ static int ring_size = 128;
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+static int max_num_vrss_chns = 8;
+
static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFUP |
NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR |
@@ -104,7 +106,7 @@ static int netvsc_open(struct net_device *net)
return ret;
}
- netif_tx_start_all_queues(net);
+ netif_tx_wake_all_queues(net);
nvdev = hv_get_drvdata(device_obj);
rdev = nvdev->extension;
@@ -118,15 +120,56 @@ static int netvsc_close(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *device_obj = net_device_ctx->device_ctx;
+ struct netvsc_device *nvdev = hv_get_drvdata(device_obj);
int ret;
+ u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20;
+ struct vmbus_channel *chn;
netif_tx_disable(net);
/* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
cancel_work_sync(&net_device_ctx->work);
ret = rndis_filter_close(device_obj);
- if (ret != 0)
+ if (ret != 0) {
netdev_err(net, "unable to close device (ret %d).\n", ret);
+ return ret;
+ }
+
+ /* Ensure pending bytes in ring are read */
+ while (true) {
+ aread = 0;
+ for (i = 0; i < nvdev->num_chn; i++) {
+ chn = nvdev->chn_table[i];
+ if (!chn)
+ continue;
+
+ hv_get_ringbuffer_availbytes(&chn->inbound, &aread,
+ &awrite);
+
+ if (aread)
+ break;
+
+ hv_get_ringbuffer_availbytes(&chn->outbound, &aread,
+ &awrite);
+
+ if (aread)
+ break;
+ }
+
+ retry++;
+ if (retry > retry_max || aread == 0)
+ break;
+
+ msleep(msec);
+
+ if (msec < 1000)
+ msec *= 2;
+ }
+
+ if (aread) {
+ netdev_err(net, "Ring buffer not empty after closing rndis\n");
+ ret = -ETIMEDOUT;
+ }
return ret;
}
@@ -196,12 +239,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
struct flow_keys flow;
int data_len;
- if (!skb_flow_dissect(skb, &flow) ||
- !(flow.n_proto == htons(ETH_P_IP) ||
- flow.n_proto == htons(ETH_P_IPV6)))
+ if (!skb_flow_dissect_flow_keys(skb, &flow, 0) ||
+ !(flow.basic.n_proto == htons(ETH_P_IP) ||
+ flow.basic.n_proto == htons(ETH_P_IPV6)))
return false;
- if (flow.ip_proto == IPPROTO_TCP)
+ if (flow.basic.ip_proto == IPPROTO_TCP)
data_len = 12;
else
data_len = 8;
@@ -391,7 +434,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
u32 skb_length;
u32 pkt_sz;
struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
-
+ struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
/* We will atmost need two pages to describe the rndis
* header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@ -580,8 +623,10 @@ do_send:
drop:
if (ret == 0) {
- net->stats.tx_bytes += skb_length;
- net->stats.tx_packets++;
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->packets++;
+ tx_stats->bytes += skb_length;
+ u64_stats_update_end(&tx_stats->syncp);
} else {
if (ret != -EAGAIN) {
dev_kfree_skb_any(skb);
@@ -644,13 +689,17 @@ int netvsc_recv_callback(struct hv_device *device_obj,
struct ndis_tcp_ip_checksum_info *csum_info)
{
struct net_device *net;
+ struct net_device_context *net_device_ctx;
struct sk_buff *skb;
+ struct netvsc_stats *rx_stats;
net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
if (!net || net->reg_state != NETREG_REGISTERED) {
packet->status = NVSP_STAT_FAIL;
return 0;
}
+ net_device_ctx = netdev_priv(net);
+ rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
/* Allocate a skb - TODO direct I/O to pages? */
skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
@@ -686,8 +735,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
skb_record_rx_queue(skb, packet->channel->
offermsg.offer.sub_channel_index);
- net->stats.rx_packets++;
- net->stats.rx_bytes += packet->total_data_buflen;
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->packets++;
+ rx_stats->bytes += packet->total_data_buflen;
+ u64_stats_update_end(&rx_stats->syncp);
/*
* Pass the skb back up. Network stack will deallocate the skb when it
@@ -719,6 +770,104 @@ static void netvsc_get_channels(struct net_device *net,
}
}
+static int netvsc_set_channels(struct net_device *net,
+ struct ethtool_channels *channels)
+{
+ struct net_device_context *net_device_ctx = netdev_priv(net);
+ struct hv_device *dev = net_device_ctx->device_ctx;
+ struct netvsc_device *nvdev = hv_get_drvdata(dev);
+ struct netvsc_device_info device_info;
+ u32 num_chn;
+ u32 max_chn;
+ int ret = 0;
+ bool recovering = false;
+
+ if (!nvdev || nvdev->destroy)
+ return -ENODEV;
+
+ num_chn = nvdev->num_chn;
+ max_chn = min_t(u32, nvdev->max_chn, num_online_cpus());
+
+ if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) {
+ pr_info("vRSS unsupported before NVSP Version 5\n");
+ return -EINVAL;
+ }
+
+ /* We do not support rx, tx, or other */
+ if (!channels ||
+ channels->rx_count ||
+ channels->tx_count ||
+ channels->other_count ||
+ (channels->combined_count < 1))
+ return -EINVAL;
+
+ if (channels->combined_count > max_chn) {
+ pr_info("combined channels too high, using %d\n", max_chn);
+ channels->combined_count = max_chn;
+ }
+
+ ret = netvsc_close(net);
+ if (ret)
+ goto out;
+
+ do_set:
+ nvdev->start_remove = true;
+ rndis_filter_device_remove(dev);
+
+ nvdev->num_chn = channels->combined_count;
+
+ net_device_ctx->device_ctx = dev;
+ hv_set_drvdata(dev, net);
+
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.num_chn = nvdev->num_chn; /* passed to RNDIS */
+ device_info.ring_size = ring_size;
+ device_info.max_num_vrss_chns = max_num_vrss_chns;
+
+ ret = rndis_filter_device_add(dev, &device_info);
+ if (ret) {
+ if (recovering) {
+ netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
+ return ret;
+ }
+ goto recover;
+ }
+
+ nvdev = hv_get_drvdata(dev);
+
+ ret = netif_set_real_num_tx_queues(net, nvdev->num_chn);
+ if (ret) {
+ if (recovering) {
+ netdev_err(net, "could not set tx queue count (ret %d)\n", ret);
+ return ret;
+ }
+ goto recover;
+ }
+
+ ret = netif_set_real_num_rx_queues(net, nvdev->num_chn);
+ if (ret) {
+ if (recovering) {
+ netdev_err(net, "could not set rx queue count (ret %d)\n", ret);
+ return ret;
+ }
+ goto recover;
+ }
+
+ out:
+ netvsc_open(net);
+
+ return ret;
+
+ recover:
+ /* If the above failed, we attempt to recover through the same
+ * process but with the original number of channels.
+ */
+ netdev_err(net, "could not set channels, recovering\n");
+ recovering = true;
+ channels->combined_count = num_chn;
+ goto do_set;
+}
+
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
struct net_device_context *ndevctx = netdev_priv(ndev);
@@ -726,6 +875,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
struct netvsc_device *nvdev = hv_get_drvdata(hdev);
struct netvsc_device_info device_info;
int limit = ETH_DATA_LEN;
+ int ret = 0;
if (nvdev == NULL || nvdev->destroy)
return -ENODEV;
@@ -733,26 +883,73 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
limit = NETVSC_MTU - ETH_HLEN;
- /* Hyper-V hosts don't support MTU < ETH_DATA_LEN (1500) */
- if (mtu < ETH_DATA_LEN || mtu > limit)
+ if (mtu < NETVSC_MTU_MIN || mtu > limit)
return -EINVAL;
+ ret = netvsc_close(ndev);
+ if (ret)
+ goto out;
+
nvdev->start_remove = true;
- cancel_work_sync(&ndevctx->work);
- netif_tx_disable(ndev);
rndis_filter_device_remove(hdev);
ndev->mtu = mtu;
ndevctx->device_ctx = hdev;
hv_set_drvdata(hdev, ndev);
+
+ memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
+ device_info.num_chn = nvdev->num_chn;
+ device_info.max_num_vrss_chns = max_num_vrss_chns;
rndis_filter_device_add(hdev, &device_info);
- netif_tx_wake_all_queues(ndev);
- return 0;
+out:
+ netvsc_open(ndev);
+
+ return ret;
}
+static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
+ struct rtnl_link_stats64 *t)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(net);
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats,
+ cpu);
+ struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
+ cpu);
+ u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ tx_packets = tx_stats->packets;
+ tx_bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
+
+ do {
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ rx_packets = rx_stats->packets;
+ rx_bytes = rx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
+
+ t->tx_bytes += tx_bytes;
+ t->tx_packets += tx_packets;
+ t->rx_bytes += rx_bytes;
+ t->rx_packets += rx_packets;
+ }
+
+ t->tx_dropped = net->stats.tx_dropped;
+ t->tx_errors = net->stats.tx_dropped;
+
+ t->rx_dropped = net->stats.rx_dropped;
+ t->rx_errors = net->stats.rx_errors;
+
+ return t;
+}
static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
{
@@ -793,6 +990,7 @@ static const struct ethtool_ops ethtool_ops = {
.get_drvinfo = netvsc_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_channels = netvsc_get_channels,
+ .set_channels = netvsc_set_channels,
};
static const struct net_device_ops device_ops = {
@@ -804,6 +1002,7 @@ static const struct net_device_ops device_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = netvsc_set_mac_addr,
.ndo_select_queue = netvsc_select_queue,
+ .ndo_get_stats64 = netvsc_get_stats64,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = netvsc_poll_controller,
#endif
@@ -855,6 +1054,14 @@ static void netvsc_link_change(struct work_struct *w)
netdev_notify_peers(net);
}
+static void netvsc_free_netdev(struct net_device *netdev)
+{
+ struct net_device_context *net_device_ctx = netdev_priv(netdev);
+
+ free_percpu(net_device_ctx->tx_stats);
+ free_percpu(net_device_ctx->rx_stats);
+ free_netdev(netdev);
+}
static int netvsc_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
@@ -883,6 +1090,18 @@ static int netvsc_probe(struct hv_device *dev,
netdev_dbg(net, "netvsc msg_enable: %d\n",
net_device_ctx->msg_enable);
+ net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
+ if (!net_device_ctx->tx_stats) {
+ free_netdev(net);
+ return -ENOMEM;
+ }
+ net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
+ if (!net_device_ctx->rx_stats) {
+ free_percpu(net_device_ctx->tx_stats);
+ free_netdev(net);
+ return -ENOMEM;
+ }
+
hv_set_drvdata(dev, net);
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
INIT_WORK(&net_device_ctx->work, do_set_multicast);
@@ -905,11 +1124,13 @@ static int netvsc_probe(struct hv_device *dev,
net->needed_headroom = max_needed_headroom;
/* Notify the netvsc driver of the new device */
+ memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
+ device_info.max_num_vrss_chns = max_num_vrss_chns;
ret = rndis_filter_device_add(dev, &device_info);
if (ret != 0) {
netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- free_netdev(net);
+ netvsc_free_netdev(net);
hv_set_drvdata(dev, NULL);
return ret;
}
@@ -923,7 +1144,7 @@ static int netvsc_probe(struct hv_device *dev,
if (ret != 0) {
pr_err("Unable to register netdev.\n");
rndis_filter_device_remove(dev);
- free_netdev(net);
+ netvsc_free_netdev(net);
} else {
schedule_delayed_work(&net_device_ctx->dwork, 0);
}
@@ -962,7 +1183,7 @@ static int netvsc_remove(struct hv_device *dev)
*/
rndis_filter_device_remove(dev);
- free_netdev(net);
+ netvsc_free_netdev(net);
return 0;
}
diff --git a/kernel/drivers/net/hyperv/rndis_filter.c b/kernel/drivers/net/hyperv/rndis_filter.c
index 9118cea91..5931a799a 100644
--- a/kernel/drivers/net/hyperv/rndis_filter.c
+++ b/kernel/drivers/net/hyperv/rndis_filter.c
@@ -27,6 +27,7 @@
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/nls.h>
+#include <linux/vmalloc.h>
#include "hyperv_net.h"
@@ -983,9 +984,16 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
struct netvsc_device *nvscdev;
u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
int ret;
+ unsigned long flags;
nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
+ spin_lock_irqsave(&nvscdev->sc_lock, flags);
+ nvscdev->num_sc_offered--;
+ spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
+ if (nvscdev->num_sc_offered == 0)
+ complete(&nvscdev->channel_init_wait);
+
if (chn_index >= nvscdev->num_chn)
return;
@@ -1013,6 +1021,11 @@ int rndis_filter_device_add(struct hv_device *dev,
struct ndis_recv_scale_cap rsscap;
u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
u32 mtu, size;
+ u32 num_rss_qs;
+ u32 sc_delta;
+ const struct cpumask *node_cpu_mask;
+ u32 num_possible_rss_qs;
+ unsigned long flags;
rndis_device = get_rndis_device();
if (!rndis_device)
@@ -1035,6 +1048,8 @@ int rndis_filter_device_add(struct hv_device *dev,
net_device->max_chn = 1;
net_device->num_chn = 1;
+ spin_lock_init(&net_device->sc_lock);
+
net_device->extension = rndis_device;
rndis_device->net_dev = net_device;
@@ -1050,7 +1065,7 @@ int rndis_filter_device_add(struct hv_device *dev,
ret = rndis_filter_query_device(rndis_device,
RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
&mtu, &size);
- if (ret == 0 && size == sizeof(u32))
+ if (ret == 0 && size == sizeof(u32) && mtu < net_device->ndev->mtu)
net_device->ndev->mtu = mtu;
/* Get the mac address */
@@ -1100,9 +1115,26 @@ int rndis_filter_device_add(struct hv_device *dev,
if (ret || rsscap.num_recv_que < 2)
goto out;
+ num_rss_qs = min(device_info->max_num_vrss_chns, rsscap.num_recv_que);
+
net_device->max_chn = rsscap.num_recv_que;
- net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
- num_online_cpus() : rsscap.num_recv_que;
+
+ /*
+ * We will limit the VRSS channels to the number CPUs in the NUMA node
+ * the primary channel is currently bound to.
+ */
+ node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu));
+ num_possible_rss_qs = cpumask_weight(node_cpu_mask);
+
+ /* We will use the given number of channels if available. */
+ if (device_info->num_chn && device_info->num_chn < net_device->max_chn)
+ net_device->num_chn = device_info->num_chn;
+ else
+ net_device->num_chn = min(num_possible_rss_qs, num_rss_qs);
+
+ num_rss_qs = net_device->num_chn - 1;
+ net_device->num_sc_offered = num_rss_qs;
+
if (net_device->num_chn == 1)
goto out;
@@ -1144,11 +1176,25 @@ int rndis_filter_device_add(struct hv_device *dev,
ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
+ /*
+ * Wait for the host to send us the sub-channel offers.
+ */
+ spin_lock_irqsave(&net_device->sc_lock, flags);
+ sc_delta = num_rss_qs - (net_device->num_chn - 1);
+ net_device->num_sc_offered -= sc_delta;
+ spin_unlock_irqrestore(&net_device->sc_lock, flags);
+
+ while (net_device->num_sc_offered != 0) {
+ t = wait_for_completion_timeout(&net_device->channel_init_wait, 10*HZ);
+ if (t == 0)
+ WARN(1, "Netvsc: Waiting for sub-channel processing");
+ }
out:
if (ret) {
net_device->max_chn = 1;
net_device->num_chn = 1;
}
+
return 0; /* return 0 because primary channel can be used alone */
err_dev_remv: