diff options
Diffstat (limited to 'kernel/drivers/net/hyperv')
-rw-r--r-- | kernel/drivers/net/hyperv/Kconfig | 5 | ||||
-rw-r--r-- | kernel/drivers/net/hyperv/Makefile | 3 | ||||
-rw-r--r-- | kernel/drivers/net/hyperv/hyperv_net.h | 1221 | ||||
-rw-r--r-- | kernel/drivers/net/hyperv/netvsc.c | 1231 | ||||
-rw-r--r-- | kernel/drivers/net/hyperv/netvsc_drv.c | 1004 | ||||
-rw-r--r-- | kernel/drivers/net/hyperv/rndis_filter.c | 1192 |
6 files changed, 4656 insertions, 0 deletions
diff --git a/kernel/drivers/net/hyperv/Kconfig b/kernel/drivers/net/hyperv/Kconfig new file mode 100644 index 000000000..936968d23 --- /dev/null +++ b/kernel/drivers/net/hyperv/Kconfig @@ -0,0 +1,5 @@ +config HYPERV_NET + tristate "Microsoft Hyper-V virtual network driver" + depends on HYPERV + help + Select this option to enable the Hyper-V virtual network driver. diff --git a/kernel/drivers/net/hyperv/Makefile b/kernel/drivers/net/hyperv/Makefile new file mode 100644 index 000000000..c8a668271 --- /dev/null +++ b/kernel/drivers/net/hyperv/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o + +hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o diff --git a/kernel/drivers/net/hyperv/hyperv_net.h b/kernel/drivers/net/hyperv/hyperv_net.h new file mode 100644 index 000000000..41071d32b --- /dev/null +++ b/kernel/drivers/net/hyperv/hyperv_net.h @@ -0,0 +1,1221 @@ +/* + * + * Copyright (c) 2011, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + * K. Y. Srinivasan <kys@microsoft.com> + * + */ + +#ifndef _HYPERV_NET_H +#define _HYPERV_NET_H + +#include <linux/list.h> +#include <linux/hyperv.h> +#include <linux/rndis.h> + +/* RSS related */ +#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ +#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */ + +#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 +#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 + +#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 +#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 + +struct ndis_obj_header { + u8 type; + u8 rev; + u16 size; +} __packed; + +/* ndis_recv_scale_cap/cap_flag */ +#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 +#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000 +#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 +#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 + +struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ + struct ndis_obj_header hdr; + u32 cap_flag; + u32 num_int_msg; + u32 num_recv_que; + u16 num_indirect_tabent; +} __packed; + + +/* ndis_recv_scale_param flags */ +#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 +#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 +#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 +#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 +#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 + +/* Hash info bits */ +#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001 +#define NDIS_HASH_IPV4 0x00000100 +#define NDIS_HASH_TCP_IPV4 0x00000200 +#define NDIS_HASH_IPV6 0x00000400 +#define NDIS_HASH_IPV6_EX 0x00000800 +#define NDIS_HASH_TCP_IPV6 0x00001000 +#define NDIS_HASH_TCP_IPV6_EX 0x00002000 + +#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) +#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 + +#define ITAB_NUM 128 +#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 +extern u8 netvsc_hash_key[]; + +struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ + struct ndis_obj_header hdr; + + /* Qualifies the rest of the information */ + u16 flag; + + /* The base CPU number to do receive processing. not used */ + u16 base_cpu_number; + + /* This describes the hash function and type being enabled */ + u32 hashinfo; + + /* The size of indirection table array */ + u16 indirect_tabsize; + + /* The offset of the indirection table from the beginning of this + * structure + */ + u32 indirect_taboffset; + + /* The size of the hash secret key */ + u16 hashkey_size; + + /* The offset of the secret key from the beginning of this structure */ + u32 kashkey_offset; + + u32 processor_masks_offset; + u32 num_processor_masks; + u32 processor_masks_entry_size; +}; + +/* Fwd declaration */ +struct ndis_tcp_ip_checksum_info; + +/* + * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame + * within the RNDIS + */ +struct hv_netvsc_packet { + /* Bookkeeping stuff */ + u32 status; + + bool is_data_pkt; + bool xmit_more; /* from skb */ + bool cp_partial; /* partial copy into send buffer */ + + u16 vlan_tci; + + u16 q_idx; + struct vmbus_channel *channel; + + u64 send_completion_tid; + void *send_completion_ctx; + void (*send_completion)(void *context); + + u32 send_buf_index; + + /* This points to the memory after page_buf */ + struct rndis_message *rndis_msg; + + u32 rmsg_size; /* RNDIS header and PPI size */ + u32 rmsg_pgcnt; /* page count of RNDIS header and PPI */ + + u32 total_data_buflen; + /* Points to the send/receive buffer where the ethernet frame is */ + void *data; + u32 page_buf_cnt; + struct hv_page_buffer *page_buf; +}; + +struct netvsc_device_info { + unsigned char mac_adr[ETH_ALEN]; + bool link_state; /* 0 - link up, 1 - link down */ + int ring_size; +}; + +enum rndis_device_state { + RNDIS_DEV_UNINITIALIZED = 0, + RNDIS_DEV_INITIALIZING, + RNDIS_DEV_INITIALIZED, + RNDIS_DEV_DATAINITIALIZED, +}; + +struct rndis_device { + struct netvsc_device *net_dev; + + enum rndis_device_state state; + bool link_state; + bool link_change; + atomic_t new_req_id; + + spinlock_t request_lock; + struct list_head req_list; + + unsigned char hw_mac_adr[ETH_ALEN]; +}; + + +/* Interface */ +int netvsc_device_add(struct hv_device *device, void *additional_info); +int netvsc_device_remove(struct hv_device *device); +int netvsc_send(struct hv_device *device, + struct hv_netvsc_packet *packet); +void netvsc_linkstatus_callback(struct hv_device *device_obj, + struct rndis_message *resp); +void netvsc_xmit_completion(void *context); +int netvsc_recv_callback(struct hv_device *device_obj, + struct hv_netvsc_packet *packet, + struct ndis_tcp_ip_checksum_info *csum_info); +void netvsc_channel_cb(void *context); +int rndis_filter_open(struct hv_device *dev); +int rndis_filter_close(struct hv_device *dev); +int rndis_filter_device_add(struct hv_device *dev, + void *additional_info); +void rndis_filter_device_remove(struct hv_device *dev); +int rndis_filter_receive(struct hv_device *dev, + struct hv_netvsc_packet *pkt); + +int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter); +int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac); + + +#define NVSP_INVALID_PROTOCOL_VERSION ((u32)0xFFFFFFFF) + +#define NVSP_PROTOCOL_VERSION_1 2 +#define NVSP_PROTOCOL_VERSION_2 0x30002 +#define NVSP_PROTOCOL_VERSION_4 0x40000 +#define NVSP_PROTOCOL_VERSION_5 0x50000 + +enum { + NVSP_MSG_TYPE_NONE = 0, + + /* Init Messages */ + NVSP_MSG_TYPE_INIT = 1, + NVSP_MSG_TYPE_INIT_COMPLETE = 2, + + NVSP_VERSION_MSG_START = 100, + + /* Version 1 Messages */ + NVSP_MSG1_TYPE_SEND_NDIS_VER = NVSP_VERSION_MSG_START, + + NVSP_MSG1_TYPE_SEND_RECV_BUF, + NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE, + NVSP_MSG1_TYPE_REVOKE_RECV_BUF, + + NVSP_MSG1_TYPE_SEND_SEND_BUF, + NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE, + NVSP_MSG1_TYPE_REVOKE_SEND_BUF, + + NVSP_MSG1_TYPE_SEND_RNDIS_PKT, + NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, + + /* Version 2 messages */ + NVSP_MSG2_TYPE_SEND_CHIMNEY_DELEGATED_BUF, + NVSP_MSG2_TYPE_SEND_CHIMNEY_DELEGATED_BUF_COMP, + NVSP_MSG2_TYPE_REVOKE_CHIMNEY_DELEGATED_BUF, + + NVSP_MSG2_TYPE_RESUME_CHIMNEY_RX_INDICATION, + + NVSP_MSG2_TYPE_TERMINATE_CHIMNEY, + NVSP_MSG2_TYPE_TERMINATE_CHIMNEY_COMP, + + NVSP_MSG2_TYPE_INDICATE_CHIMNEY_EVENT, + + NVSP_MSG2_TYPE_SEND_CHIMNEY_PKT, + NVSP_MSG2_TYPE_SEND_CHIMNEY_PKT_COMP, + + NVSP_MSG2_TYPE_POST_CHIMNEY_RECV_REQ, + NVSP_MSG2_TYPE_POST_CHIMNEY_RECV_REQ_COMP, + + NVSP_MSG2_TYPE_ALLOC_RXBUF, + NVSP_MSG2_TYPE_ALLOC_RXBUF_COMP, + + NVSP_MSG2_TYPE_FREE_RXBUF, + + NVSP_MSG2_TYPE_SEND_VMQ_RNDIS_PKT, + NVSP_MSG2_TYPE_SEND_VMQ_RNDIS_PKT_COMP, + + NVSP_MSG2_TYPE_SEND_NDIS_CONFIG, + + NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE, + NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP, + + NVSP_MSG2_MAX = NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP, + + /* Version 4 messages */ + NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION, + NVSP_MSG4_TYPE_SWITCH_DATA_PATH, + NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED, + + NVSP_MSG4_MAX = NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED, + + /* Version 5 messages */ + NVSP_MSG5_TYPE_OID_QUERY_EX, + NVSP_MSG5_TYPE_OID_QUERY_EX_COMP, + NVSP_MSG5_TYPE_SUBCHANNEL, + NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, + + NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, +}; + +enum { + NVSP_STAT_NONE = 0, + NVSP_STAT_SUCCESS, + NVSP_STAT_FAIL, + NVSP_STAT_PROTOCOL_TOO_NEW, + NVSP_STAT_PROTOCOL_TOO_OLD, + NVSP_STAT_INVALID_RNDIS_PKT, + NVSP_STAT_BUSY, + NVSP_STAT_PROTOCOL_UNSUPPORTED, + NVSP_STAT_MAX, +}; + +struct nvsp_message_header { + u32 msg_type; +}; + +/* Init Messages */ + +/* + * This message is used by the VSC to initialize the channel after the channels + * has been opened. This message should never include anything other then + * versioning (i.e. this message will be the same for ever). + */ +struct nvsp_message_init { + u32 min_protocol_ver; + u32 max_protocol_ver; +} __packed; + +/* + * This message is used by the VSP to complete the initialization of the + * channel. This message should never include anything other then versioning + * (i.e. this message will be the same for ever). + */ +struct nvsp_message_init_complete { + u32 negotiated_protocol_ver; + u32 max_mdl_chain_len; + u32 status; +} __packed; + +union nvsp_message_init_uber { + struct nvsp_message_init init; + struct nvsp_message_init_complete init_complete; +} __packed; + +/* Version 1 Messages */ + +/* + * This message is used by the VSC to send the NDIS version to the VSP. The VSP + * can use this information when handling OIDs sent by the VSC. + */ +struct nvsp_1_message_send_ndis_version { + u32 ndis_major_ver; + u32 ndis_minor_ver; +} __packed; + +/* + * This message is used by the VSC to send a receive buffer to the VSP. The VSP + * can then use the receive buffer to send data to the VSC. + */ +struct nvsp_1_message_send_receive_buffer { + u32 gpadl_handle; + u16 id; +} __packed; + +struct nvsp_1_receive_buffer_section { + u32 offset; + u32 sub_alloc_size; + u32 num_sub_allocs; + u32 end_offset; +} __packed; + +/* + * This message is used by the VSP to acknowledge a receive buffer send by the + * VSC. This message must be sent by the VSP before the VSP uses the receive + * buffer. + */ +struct nvsp_1_message_send_receive_buffer_complete { + u32 status; + u32 num_sections; + + /* + * The receive buffer is split into two parts, a large suballocation + * section and a small suballocation section. These sections are then + * suballocated by a certain size. + */ + + /* + * For example, the following break up of the receive buffer has 6 + * large suballocations and 10 small suballocations. + */ + + /* + * | Large Section | | Small Section | + * ------------------------------------------------------------ + * | | | | | | | | | | | | | | | | | | + * | | + * LargeOffset SmallOffset + */ + + struct nvsp_1_receive_buffer_section sections[1]; +} __packed; + +/* + * This message is sent by the VSC to revoke the receive buffer. After the VSP + * completes this transaction, the vsp should never use the receive buffer + * again. + */ +struct nvsp_1_message_revoke_receive_buffer { + u16 id; +}; + +/* + * This message is used by the VSC to send a send buffer to the VSP. The VSC + * can then use the send buffer to send data to the VSP. + */ +struct nvsp_1_message_send_send_buffer { + u32 gpadl_handle; + u16 id; +} __packed; + +/* + * This message is used by the VSP to acknowledge a send buffer sent by the + * VSC. This message must be sent by the VSP before the VSP uses the sent + * buffer. + */ +struct nvsp_1_message_send_send_buffer_complete { + u32 status; + + /* + * The VSC gets to choose the size of the send buffer and the VSP gets + * to choose the sections size of the buffer. This was done to enable + * dynamic reconfigurations when the cost of GPA-direct buffers + * decreases. + */ + u32 section_size; +} __packed; + +/* + * This message is sent by the VSC to revoke the send buffer. After the VSP + * completes this transaction, the vsp should never use the send buffer again. + */ +struct nvsp_1_message_revoke_send_buffer { + u16 id; +}; + +/* + * This message is used by both the VSP and the VSC to send a RNDIS message to + * the opposite channel endpoint. + */ +struct nvsp_1_message_send_rndis_packet { + /* + * This field is specified by RNIDS. They assume there's two different + * channels of communication. However, the Network VSP only has one. + * Therefore, the channel travels with the RNDIS packet. + */ + u32 channel_type; + + /* + * This field is used to send part or all of the data through a send + * buffer. This values specifies an index into the send buffer. If the + * index is 0xFFFFFFFF, then the send buffer is not being used and all + * of the data was sent through other VMBus mechanisms. + */ + u32 send_buf_section_index; + u32 send_buf_section_size; +} __packed; + +/* + * This message is used by both the VSP and the VSC to complete a RNDIS message + * to the opposite channel endpoint. At this point, the initiator of this + * message cannot use any resources associated with the original RNDIS packet. + */ +struct nvsp_1_message_send_rndis_packet_complete { + u32 status; +}; + +union nvsp_1_message_uber { + struct nvsp_1_message_send_ndis_version send_ndis_ver; + + struct nvsp_1_message_send_receive_buffer send_recv_buf; + struct nvsp_1_message_send_receive_buffer_complete + send_recv_buf_complete; + struct nvsp_1_message_revoke_receive_buffer revoke_recv_buf; + + struct nvsp_1_message_send_send_buffer send_send_buf; + struct nvsp_1_message_send_send_buffer_complete send_send_buf_complete; + struct nvsp_1_message_revoke_send_buffer revoke_send_buf; + + struct nvsp_1_message_send_rndis_packet send_rndis_pkt; + struct nvsp_1_message_send_rndis_packet_complete + send_rndis_pkt_complete; +} __packed; + + +/* + * Network VSP protocol version 2 messages: + */ +struct nvsp_2_vsc_capability { + union { + u64 data; + struct { + u64 vmq:1; + u64 chimney:1; + u64 sriov:1; + u64 ieee8021q:1; + u64 correlation_id:1; + }; + }; +} __packed; + +struct nvsp_2_send_ndis_config { + u32 mtu; + u32 reserved; + struct nvsp_2_vsc_capability capability; +} __packed; + +/* Allocate receive buffer */ +struct nvsp_2_alloc_rxbuf { + /* Allocation ID to match the allocation request and response */ + u32 alloc_id; + + /* Length of the VM shared memory receive buffer that needs to + * be allocated + */ + u32 len; +} __packed; + +/* Allocate receive buffer complete */ +struct nvsp_2_alloc_rxbuf_comp { + /* The NDIS_STATUS code for buffer allocation */ + u32 status; + + u32 alloc_id; + + /* GPADL handle for the allocated receive buffer */ + u32 gpadl_handle; + + /* Receive buffer ID */ + u64 recv_buf_id; +} __packed; + +struct nvsp_2_free_rxbuf { + u64 recv_buf_id; +} __packed; + +union nvsp_2_message_uber { + struct nvsp_2_send_ndis_config send_ndis_config; + struct nvsp_2_alloc_rxbuf alloc_rxbuf; + struct nvsp_2_alloc_rxbuf_comp alloc_rxbuf_comp; + struct nvsp_2_free_rxbuf free_rxbuf; +} __packed; + +enum nvsp_subchannel_operation { + NVSP_SUBCHANNEL_NONE = 0, + NVSP_SUBCHANNEL_ALLOCATE, + NVSP_SUBCHANNEL_MAX +}; + +struct nvsp_5_subchannel_request { + u32 op; + u32 num_subchannels; +} __packed; + +struct nvsp_5_subchannel_complete { + u32 status; + u32 num_subchannels; /* Actual number of subchannels allocated */ +} __packed; + +struct nvsp_5_send_indirect_table { + /* The number of entries in the send indirection table */ + u32 count; + + /* The offset of the send indireciton table from top of this struct. + * The send indirection table tells which channel to put the send + * traffic on. Each entry is a channel number. + */ + u32 offset; +} __packed; + +union nvsp_5_message_uber { + struct nvsp_5_subchannel_request subchn_req; + struct nvsp_5_subchannel_complete subchn_comp; + struct nvsp_5_send_indirect_table send_table; +} __packed; + +union nvsp_all_messages { + union nvsp_message_init_uber init_msg; + union nvsp_1_message_uber v1_msg; + union nvsp_2_message_uber v2_msg; + union nvsp_5_message_uber v5_msg; +} __packed; + +/* ALL Messages */ +struct nvsp_message { + struct nvsp_message_header hdr; + union nvsp_all_messages msg; +} __packed; + + +#define NETVSC_MTU 65536 + +#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ +#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ +#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 15) /* 15MB */ +#define NETVSC_INVALID_INDEX -1 + + +#define NETVSC_RECEIVE_BUFFER_ID 0xcafe +#define NETVSC_SEND_BUFFER_ID 0 + +#define NETVSC_PACKET_SIZE 4096 + +#define VRSS_SEND_TAB_SIZE 16 + +#define RNDIS_MAX_PKT_DEFAULT 8 +#define RNDIS_PKT_ALIGN_DEFAULT 8 + +struct multi_send_data { + spinlock_t lock; /* protect struct multi_send_data */ + struct hv_netvsc_packet *pkt; /* netvsc pkt pending */ + u32 count; /* counter of batched packets */ +}; + +/* The context of the netvsc device */ +struct net_device_context { + /* point back to our device context */ + struct hv_device *device_ctx; + struct delayed_work dwork; + struct work_struct work; + u32 msg_enable; /* debug level */ +}; + +/* Per netvsc device */ +struct netvsc_device { + struct hv_device *dev; + + u32 nvsp_version; + + atomic_t num_outstanding_sends; + wait_queue_head_t wait_drain; + bool start_remove; + bool destroy; + + /* Receive buffer allocated by us but manages by NetVSP */ + void *recv_buf; + u32 recv_buf_size; + u32 recv_buf_gpadl_handle; + u32 recv_section_cnt; + struct nvsp_1_receive_buffer_section *recv_section; + + /* Send buffer allocated by us */ + void *send_buf; + u32 send_buf_size; + u32 send_buf_gpadl_handle; + u32 send_section_cnt; + u32 send_section_size; + unsigned long *send_section_map; + int map_words; + + /* Used for NetVSP initialization protocol */ + struct completion channel_init_wait; + struct nvsp_message channel_init_pkt; + + struct nvsp_message revoke_packet; + /* unsigned char HwMacAddr[HW_MACADDR_LEN]; */ + + struct net_device *ndev; + + struct vmbus_channel *chn_table[NR_CPUS]; + u32 send_table[VRSS_SEND_TAB_SIZE]; + u32 max_chn; + u32 num_chn; + atomic_t queue_sends[NR_CPUS]; + + /* Holds rndis device info */ + void *extension; + + int ring_size; + + /* The primary channel callback buffer */ + unsigned char *cb_buffer; + /* The sub channel callback buffer */ + unsigned char *sub_cb_buf; + + struct multi_send_data msd[NR_CPUS]; + u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ + u32 pkt_align; /* alignment bytes, e.g. 8 */ + + /* The net device context */ + struct net_device_context *nd_ctx; +}; + +/* NdisInitialize message */ +struct rndis_initialize_request { + u32 req_id; + u32 major_ver; + u32 minor_ver; + u32 max_xfer_size; +}; + +/* Response to NdisInitialize */ +struct rndis_initialize_complete { + u32 req_id; + u32 status; + u32 major_ver; + u32 minor_ver; + u32 dev_flags; + u32 medium; + u32 max_pkt_per_msg; + u32 max_xfer_size; + u32 pkt_alignment_factor; + u32 af_list_offset; + u32 af_list_size; +}; + +/* Call manager devices only: Information about an address family */ +/* supported by the device is appended to the response to NdisInitialize. */ +struct rndis_co_address_family { + u32 address_family; + u32 major_ver; + u32 minor_ver; +}; + +/* NdisHalt message */ +struct rndis_halt_request { + u32 req_id; +}; + +/* NdisQueryRequest message */ +struct rndis_query_request { + u32 req_id; + u32 oid; + u32 info_buflen; + u32 info_buf_offset; + u32 dev_vc_handle; +}; + +/* Response to NdisQueryRequest */ +struct rndis_query_complete { + u32 req_id; + u32 status; + u32 info_buflen; + u32 info_buf_offset; +}; + +/* NdisSetRequest message */ +struct rndis_set_request { + u32 req_id; + u32 oid; + u32 info_buflen; + u32 info_buf_offset; + u32 dev_vc_handle; +}; + +/* Response to NdisSetRequest */ +struct rndis_set_complete { + u32 req_id; + u32 status; +}; + +/* NdisReset message */ +struct rndis_reset_request { + u32 reserved; +}; + +/* Response to NdisReset */ +struct rndis_reset_complete { + u32 status; + u32 addressing_reset; +}; + +/* NdisMIndicateStatus message */ +struct rndis_indicate_status { + u32 status; + u32 status_buflen; + u32 status_buf_offset; +}; + +/* Diagnostic information passed as the status buffer in */ +/* struct rndis_indicate_status messages signifying error conditions. */ +struct rndis_diagnostic_info { + u32 diag_status; + u32 error_offset; +}; + +/* NdisKeepAlive message */ +struct rndis_keepalive_request { + u32 req_id; +}; + +/* Response to NdisKeepAlive */ +struct rndis_keepalive_complete { + u32 req_id; + u32 status; +}; + +/* + * Data message. All Offset fields contain byte offsets from the beginning of + * struct rndis_packet. All Length fields are in bytes. VcHandle is set + * to 0 for connectionless data, otherwise it contains the VC handle. + */ +struct rndis_packet { + u32 data_offset; + u32 data_len; + u32 oob_data_offset; + u32 oob_data_len; + u32 num_oob_data_elements; + u32 per_pkt_info_offset; + u32 per_pkt_info_len; + u32 vc_handle; + u32 reserved; +}; + +/* Optional Out of Band data associated with a Data message. */ +struct rndis_oobd { + u32 size; + u32 type; + u32 class_info_offset; +}; + +/* Packet extension field contents associated with a Data message. */ +struct rndis_per_packet_info { + u32 size; + u32 type; + u32 ppi_offset; +}; + +enum ndis_per_pkt_info_type { + TCPIP_CHKSUM_PKTINFO, + IPSEC_PKTINFO, + TCP_LARGESEND_PKTINFO, + CLASSIFICATION_HANDLE_PKTINFO, + NDIS_RESERVED, + SG_LIST_PKTINFO, + IEEE_8021Q_INFO, + ORIGINAL_PKTINFO, + PACKET_CANCEL_ID, + NBL_HASH_VALUE = PACKET_CANCEL_ID, + ORIGINAL_NET_BUFLIST, + CACHED_NET_BUFLIST, + SHORT_PKT_PADINFO, + MAX_PER_PKT_INFO +}; + +struct ndis_pkt_8021q_info { + union { + struct { + u32 pri:3; /* User Priority */ + u32 cfi:1; /* Canonical Format ID */ + u32 vlanid:12; /* VLAN ID */ + u32 reserved:16; + }; + u32 value; + }; +}; + +struct ndis_oject_header { + u8 type; + u8 revision; + u16 size; +}; + +#define NDIS_OBJECT_TYPE_DEFAULT 0x80 +#define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3 +#define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 +#define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 +#define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2 +#define NDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED 2 +#define NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1 +#define NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2 +#define NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1 +#define NDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2 +#define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 +#define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 + +#define NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE 1 +#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0 +#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1 + +#define VERSION_4_OFFLOAD_SIZE 22 +/* + * New offload OIDs for NDIS 6 + */ +#define OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */ +#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C /* set only */ +#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */ +#define OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */ +#define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ +#define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ + +struct ndis_offload_params { + struct ndis_oject_header header; + u8 ip_v4_csum; + u8 tcp_ip_v4_csum; + u8 udp_ip_v4_csum; + u8 tcp_ip_v6_csum; + u8 udp_ip_v6_csum; + u8 lso_v1; + u8 ip_sec_v1; + u8 lso_v2_ipv4; + u8 lso_v2_ipv6; + u8 tcp_connection_ip_v4; + u8 tcp_connection_ip_v6; + u32 flags; + u8 ip_sec_v2; + u8 ip_sec_v2_ip_v4; + struct { + u8 rsc_ip_v4; + u8 rsc_ip_v6; + }; + struct { + u8 encapsulated_packet_task_offload; + u8 encapsulation_types; + }; +}; + +struct ndis_tcp_ip_checksum_info { + union { + struct { + u32 is_ipv4:1; + u32 is_ipv6:1; + u32 tcp_checksum:1; + u32 udp_checksum:1; + u32 ip_header_checksum:1; + u32 reserved:11; + u32 tcp_header_offset:10; + } transmit; + struct { + u32 tcp_checksum_failed:1; + u32 udp_checksum_failed:1; + u32 ip_checksum_failed:1; + u32 tcp_checksum_succeeded:1; + u32 udp_checksum_succeeded:1; + u32 ip_checksum_succeeded:1; + u32 loopback:1; + u32 tcp_checksum_value_invalid:1; + u32 ip_checksum_value_invalid:1; + } receive; + u32 value; + }; +}; + +struct ndis_tcp_lso_info { + union { + struct { + u32 unused:30; + u32 type:1; + u32 reserved2:1; + } transmit; + struct { + u32 mss:20; + u32 tcp_header_offset:10; + u32 type:1; + u32 reserved2:1; + } lso_v1_transmit; + struct { + u32 tcp_payload:30; + u32 type:1; + u32 reserved2:1; + } lso_v1_transmit_complete; + struct { + u32 mss:20; + u32 tcp_header_offset:10; + u32 type:1; + u32 ip_version:1; + } lso_v2_transmit; + struct { + u32 reserved:30; + u32 type:1; + u32 reserved2:1; + } lso_v2_transmit_complete; + u32 value; + }; +}; + +#define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_pkt_8021q_info)) + +#define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_tcp_ip_checksum_info)) + +#define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_tcp_lso_info)) + +#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(u32)) + +/* Total size of all PPI data */ +#define NDIS_ALL_PPI_SIZE (NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE + \ + NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE) + +/* Format of Information buffer passed in a SetRequest for the OID */ +/* OID_GEN_RNDIS_CONFIG_PARAMETER. */ +struct rndis_config_parameter_info { + u32 parameter_name_offset; + u32 parameter_name_length; + u32 parameter_type; + u32 parameter_value_offset; + u32 parameter_value_length; +}; + +/* Values for ParameterType in struct rndis_config_parameter_info */ +#define RNDIS_CONFIG_PARAM_TYPE_INTEGER 0 +#define RNDIS_CONFIG_PARAM_TYPE_STRING 2 + +/* CONDIS Miniport messages for connection oriented devices */ +/* that do not implement a call manager. */ + +/* CoNdisMiniportCreateVc message */ +struct rcondis_mp_create_vc { + u32 req_id; + u32 ndis_vc_handle; +}; + +/* Response to CoNdisMiniportCreateVc */ +struct rcondis_mp_create_vc_complete { + u32 req_id; + u32 dev_vc_handle; + u32 status; +}; + +/* CoNdisMiniportDeleteVc message */ +struct rcondis_mp_delete_vc { + u32 req_id; + u32 dev_vc_handle; +}; + +/* Response to CoNdisMiniportDeleteVc */ +struct rcondis_mp_delete_vc_complete { + u32 req_id; + u32 status; +}; + +/* CoNdisMiniportQueryRequest message */ +struct rcondis_mp_query_request { + u32 req_id; + u32 request_type; + u32 oid; + u32 dev_vc_handle; + u32 info_buflen; + u32 info_buf_offset; +}; + +/* CoNdisMiniportSetRequest message */ +struct rcondis_mp_set_request { + u32 req_id; + u32 request_type; + u32 oid; + u32 dev_vc_handle; + u32 info_buflen; + u32 info_buf_offset; +}; + +/* CoNdisIndicateStatus message */ +struct rcondis_indicate_status { + u32 ndis_vc_handle; + u32 status; + u32 status_buflen; + u32 status_buf_offset; +}; + +/* CONDIS Call/VC parameters */ +struct rcondis_specific_parameters { + u32 parameter_type; + u32 parameter_length; + u32 parameter_lffset; +}; + +struct rcondis_media_parameters { + u32 flags; + u32 reserved1; + u32 reserved2; + struct rcondis_specific_parameters media_specific; +}; + +struct rndis_flowspec { + u32 token_rate; + u32 token_bucket_size; + u32 peak_bandwidth; + u32 latency; + u32 delay_variation; + u32 service_type; + u32 max_sdu_size; + u32 minimum_policed_size; +}; + +struct rcondis_call_manager_parameters { + struct rndis_flowspec transmit; + struct rndis_flowspec receive; + struct rcondis_specific_parameters call_mgr_specific; +}; + +/* CoNdisMiniportActivateVc message */ +struct rcondis_mp_activate_vc_request { + u32 req_id; + u32 flags; + u32 dev_vc_handle; + u32 media_params_offset; + u32 media_params_length; + u32 call_mgr_params_offset; + u32 call_mgr_params_length; +}; + +/* Response to CoNdisMiniportActivateVc */ +struct rcondis_mp_activate_vc_complete { + u32 req_id; + u32 status; +}; + +/* CoNdisMiniportDeactivateVc message */ +struct rcondis_mp_deactivate_vc_request { + u32 req_id; + u32 flags; + u32 dev_vc_handle; +}; + +/* Response to CoNdisMiniportDeactivateVc */ +struct rcondis_mp_deactivate_vc_complete { + u32 req_id; + u32 status; +}; + + +/* union with all of the RNDIS messages */ +union rndis_message_container { + struct rndis_packet pkt; + struct rndis_initialize_request init_req; + struct rndis_halt_request halt_req; + struct rndis_query_request query_req; + struct rndis_set_request set_req; + struct rndis_reset_request reset_req; + struct rndis_keepalive_request keep_alive_req; + struct rndis_indicate_status indicate_status; + struct rndis_initialize_complete init_complete; + struct rndis_query_complete query_complete; + struct rndis_set_complete set_complete; + struct rndis_reset_complete reset_complete; + struct rndis_keepalive_complete keep_alive_complete; + struct rcondis_mp_create_vc co_miniport_create_vc; + struct rcondis_mp_delete_vc co_miniport_delete_vc; + struct rcondis_indicate_status co_indicate_status; + struct rcondis_mp_activate_vc_request co_miniport_activate_vc; + struct rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc; + struct rcondis_mp_create_vc_complete co_miniport_create_vc_complete; + struct rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete; + struct rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete; + struct rcondis_mp_deactivate_vc_complete + co_miniport_deactivate_vc_complete; +}; + +/* Remote NDIS message format */ +struct rndis_message { + u32 ndis_msg_type; + + /* Total length of this message, from the beginning */ + /* of the sruct rndis_message, in bytes. */ + u32 msg_len; + + /* Actual message */ + union rndis_message_container msg; +}; + + +/* Handy macros */ + +/* get the size of an RNDIS message. Pass in the message type, */ +/* struct rndis_set_request, struct rndis_packet for example */ +#define RNDIS_MESSAGE_SIZE(msg) \ + (sizeof(msg) + (sizeof(struct rndis_message) - \ + sizeof(union rndis_message_container))) + +/* get pointer to info buffer with message pointer */ +#define MESSAGE_TO_INFO_BUFFER(msg) \ + (((unsigned char *)(msg)) + msg->info_buf_offset) + +/* get pointer to status buffer with message pointer */ +#define MESSAGE_TO_STATUS_BUFFER(msg) \ + (((unsigned char *)(msg)) + msg->status_buf_offset) + +/* get pointer to OOBD buffer with message pointer */ +#define MESSAGE_TO_OOBD_BUFFER(msg) \ + (((unsigned char *)(msg)) + msg->oob_data_offset) + +/* get pointer to data buffer with message pointer */ +#define MESSAGE_TO_DATA_BUFFER(msg) \ + (((unsigned char *)(msg)) + msg->per_pkt_info_offset) + +/* get pointer to contained message from NDIS_MESSAGE pointer */ +#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_msg) \ + ((void *) &rndis_msg->msg) + +/* get pointer to contained message from NDIS_MESSAGE pointer */ +#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_msg) \ + ((void *) rndis_msg) + + +#define __struct_bcount(x) + + + +#define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \ + sizeof(union rndis_message_container)) + +#define RNDIS_AND_PPI_SIZE (sizeof(struct rndis_message) + NDIS_ALL_PPI_SIZE) + +#define NDIS_PACKET_TYPE_DIRECTED 0x00000001 +#define NDIS_PACKET_TYPE_MULTICAST 0x00000002 +#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004 +#define NDIS_PACKET_TYPE_BROADCAST 0x00000008 +#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010 +#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020 +#define NDIS_PACKET_TYPE_SMT 0x00000040 +#define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080 +#define NDIS_PACKET_TYPE_GROUP 0x00000100 +#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00000200 +#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400 +#define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800 + +#define INFO_IPV4 2 +#define INFO_IPV6 4 +#define INFO_TCP 2 +#define INFO_UDP 4 + +#define TRANSPORT_INFO_NOT_IP 0 +#define TRANSPORT_INFO_IPV4_TCP ((INFO_IPV4 << 16) | INFO_TCP) +#define TRANSPORT_INFO_IPV4_UDP ((INFO_IPV4 << 16) | INFO_UDP) +#define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP) +#define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP) + + +#endif /* _HYPERV_NET_H */ diff --git a/kernel/drivers/net/hyperv/netvsc.c b/kernel/drivers/net/hyperv/netvsc.c new file mode 100644 index 000000000..ea091bc5f --- /dev/null +++ b/kernel/drivers/net/hyperv/netvsc.c @@ -0,0 +1,1231 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/if_ether.h> +#include <asm/sync_bitops.h> + +#include "hyperv_net.h" + + +static struct netvsc_device *alloc_net_device(struct hv_device *device) +{ + struct netvsc_device *net_device; + struct net_device *ndev = hv_get_drvdata(device); + int i; + + net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL); + if (!net_device) + return NULL; + + net_device->cb_buffer = kzalloc(NETVSC_PACKET_SIZE, GFP_KERNEL); + if (!net_device->cb_buffer) { + kfree(net_device); + return NULL; + } + + init_waitqueue_head(&net_device->wait_drain); + net_device->start_remove = false; + net_device->destroy = false; + net_device->dev = device; + net_device->ndev = ndev; + net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; + net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; + + for (i = 0; i < num_online_cpus(); i++) + spin_lock_init(&net_device->msd[i].lock); + + hv_set_drvdata(device, net_device); + return net_device; +} + +static void free_netvsc_device(struct netvsc_device *nvdev) +{ + kfree(nvdev->cb_buffer); + kfree(nvdev); +} + +static struct netvsc_device *get_outbound_net_device(struct hv_device *device) +{ + struct netvsc_device *net_device; + + net_device = hv_get_drvdata(device); + if (net_device && net_device->destroy) + net_device = NULL; + + return net_device; +} + +static struct netvsc_device *get_inbound_net_device(struct hv_device *device) +{ + struct netvsc_device *net_device; + + net_device = hv_get_drvdata(device); + + if (!net_device) + goto get_in_err; + + if (net_device->destroy && + atomic_read(&net_device->num_outstanding_sends) == 0) + net_device = NULL; + +get_in_err: + return net_device; +} + + +static int netvsc_destroy_buf(struct netvsc_device *net_device) +{ + struct nvsp_message *revoke_packet; + int ret = 0; + struct net_device *ndev = net_device->ndev; + + /* + * If we got a section count, it means we received a + * SendReceiveBufferComplete msg (ie sent + * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need + * to send a revoke msg here + */ + if (net_device->recv_section_cnt) { + /* Send the revoke receive buffer */ + revoke_packet = &net_device->revoke_packet; + memset(revoke_packet, 0, sizeof(struct nvsp_message)); + + revoke_packet->hdr.msg_type = + NVSP_MSG1_TYPE_REVOKE_RECV_BUF; + revoke_packet->msg.v1_msg. + revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; + + ret = vmbus_sendpacket(net_device->dev->channel, + revoke_packet, + sizeof(struct nvsp_message), + (unsigned long)revoke_packet, + VM_PKT_DATA_INBAND, 0); + /* + * If we failed here, we might as well return and + * have a leak rather than continue and a bugchk + */ + if (ret != 0) { + netdev_err(ndev, "unable to send " + "revoke receive buffer to netvsp\n"); + return ret; + } + } + + /* Teardown the gpadl on the vsp end */ + if (net_device->recv_buf_gpadl_handle) { + ret = vmbus_teardown_gpadl(net_device->dev->channel, + net_device->recv_buf_gpadl_handle); + + /* If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + netdev_err(ndev, + "unable to teardown receive buffer's gpadl\n"); + return ret; + } + net_device->recv_buf_gpadl_handle = 0; + } + + if (net_device->recv_buf) { + /* Free up the receive buffer */ + vfree(net_device->recv_buf); + net_device->recv_buf = NULL; + } + + if (net_device->recv_section) { + net_device->recv_section_cnt = 0; + kfree(net_device->recv_section); + net_device->recv_section = NULL; + } + + /* Deal with the send buffer we may have setup. + * If we got a send section size, it means we received a + * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent + * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need + * to send a revoke msg here + */ + if (net_device->send_section_size) { + /* Send the revoke receive buffer */ + revoke_packet = &net_device->revoke_packet; + memset(revoke_packet, 0, sizeof(struct nvsp_message)); + + revoke_packet->hdr.msg_type = + NVSP_MSG1_TYPE_REVOKE_SEND_BUF; + revoke_packet->msg.v1_msg.revoke_send_buf.id = + NETVSC_SEND_BUFFER_ID; + + ret = vmbus_sendpacket(net_device->dev->channel, + revoke_packet, + sizeof(struct nvsp_message), + (unsigned long)revoke_packet, + VM_PKT_DATA_INBAND, 0); + /* If we failed here, we might as well return and + * have a leak rather than continue and a bugchk + */ + if (ret != 0) { + netdev_err(ndev, "unable to send " + "revoke send buffer to netvsp\n"); + return ret; + } + } + /* Teardown the gpadl on the vsp end */ + if (net_device->send_buf_gpadl_handle) { + ret = vmbus_teardown_gpadl(net_device->dev->channel, + net_device->send_buf_gpadl_handle); + + /* If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + netdev_err(ndev, + "unable to teardown send buffer's gpadl\n"); + return ret; + } + net_device->send_buf_gpadl_handle = 0; + } + if (net_device->send_buf) { + /* Free up the send buffer */ + vfree(net_device->send_buf); + net_device->send_buf = NULL; + } + kfree(net_device->send_section_map); + + return ret; +} + +static int netvsc_init_buf(struct hv_device *device) +{ + int ret = 0; + unsigned long t; + struct netvsc_device *net_device; + struct nvsp_message *init_packet; + struct net_device *ndev; + + net_device = get_outbound_net_device(device); + if (!net_device) + return -ENODEV; + ndev = net_device->ndev; + + net_device->recv_buf = vzalloc(net_device->recv_buf_size); + if (!net_device->recv_buf) { + netdev_err(ndev, "unable to allocate receive " + "buffer of size %d\n", net_device->recv_buf_size); + ret = -ENOMEM; + goto cleanup; + } + + /* + * Establish the gpadl handle for this buffer on this + * channel. Note: This call uses the vmbus connection rather + * than the channel to establish the gpadl handle. + */ + ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf, + net_device->recv_buf_size, + &net_device->recv_buf_gpadl_handle); + if (ret != 0) { + netdev_err(ndev, + "unable to establish receive buffer's gpadl\n"); + goto cleanup; + } + + + /* Notify the NetVsp of the gpadl handle */ + init_packet = &net_device->channel_init_pkt; + + memset(init_packet, 0, sizeof(struct nvsp_message)); + + init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF; + init_packet->msg.v1_msg.send_recv_buf. + gpadl_handle = net_device->recv_buf_gpadl_handle; + init_packet->msg.v1_msg. + send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; + + /* Send the gpadl notification request */ + ret = vmbus_sendpacket(device->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret != 0) { + netdev_err(ndev, + "unable to send receive buffer's gpadl to netvsp\n"); + goto cleanup; + } + + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); + BUG_ON(t == 0); + + + /* Check the response */ + if (init_packet->msg.v1_msg. + send_recv_buf_complete.status != NVSP_STAT_SUCCESS) { + netdev_err(ndev, "Unable to complete receive buffer " + "initialization with NetVsp - status %d\n", + init_packet->msg.v1_msg. + send_recv_buf_complete.status); + ret = -EINVAL; + goto cleanup; + } + + /* Parse the response */ + + net_device->recv_section_cnt = init_packet->msg. + v1_msg.send_recv_buf_complete.num_sections; + + net_device->recv_section = kmemdup( + init_packet->msg.v1_msg.send_recv_buf_complete.sections, + net_device->recv_section_cnt * + sizeof(struct nvsp_1_receive_buffer_section), + GFP_KERNEL); + if (net_device->recv_section == NULL) { + ret = -EINVAL; + goto cleanup; + } + + /* + * For 1st release, there should only be 1 section that represents the + * entire receive buffer + */ + if (net_device->recv_section_cnt != 1 || + net_device->recv_section->offset != 0) { + ret = -EINVAL; + goto cleanup; + } + + /* Now setup the send buffer. + */ + net_device->send_buf = vzalloc(net_device->send_buf_size); + if (!net_device->send_buf) { + netdev_err(ndev, "unable to allocate send " + "buffer of size %d\n", net_device->send_buf_size); + ret = -ENOMEM; + goto cleanup; + } + + /* Establish the gpadl handle for this buffer on this + * channel. Note: This call uses the vmbus connection rather + * than the channel to establish the gpadl handle. + */ + ret = vmbus_establish_gpadl(device->channel, net_device->send_buf, + net_device->send_buf_size, + &net_device->send_buf_gpadl_handle); + if (ret != 0) { + netdev_err(ndev, + "unable to establish send buffer's gpadl\n"); + goto cleanup; + } + + /* Notify the NetVsp of the gpadl handle */ + init_packet = &net_device->channel_init_pkt; + memset(init_packet, 0, sizeof(struct nvsp_message)); + init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; + init_packet->msg.v1_msg.send_send_buf.gpadl_handle = + net_device->send_buf_gpadl_handle; + init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID; + + /* Send the gpadl notification request */ + ret = vmbus_sendpacket(device->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret != 0) { + netdev_err(ndev, + "unable to send send buffer's gpadl to netvsp\n"); + goto cleanup; + } + + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); + BUG_ON(t == 0); + + /* Check the response */ + if (init_packet->msg.v1_msg. + send_send_buf_complete.status != NVSP_STAT_SUCCESS) { + netdev_err(ndev, "Unable to complete send buffer " + "initialization with NetVsp - status %d\n", + init_packet->msg.v1_msg. + send_send_buf_complete.status); + ret = -EINVAL; + goto cleanup; + } + + /* Parse the response */ + net_device->send_section_size = init_packet->msg. + v1_msg.send_send_buf_complete.section_size; + + /* Section count is simply the size divided by the section size. + */ + net_device->send_section_cnt = + net_device->send_buf_size/net_device->send_section_size; + + dev_info(&device->device, "Send section size: %d, Section count:%d\n", + net_device->send_section_size, net_device->send_section_cnt); + + /* Setup state for managing the send buffer. */ + net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, + BITS_PER_LONG); + + net_device->send_section_map = + kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); + if (net_device->send_section_map == NULL) { + ret = -ENOMEM; + goto cleanup; + } + + goto exit; + +cleanup: + netvsc_destroy_buf(net_device); + +exit: + return ret; +} + + +/* Negotiate NVSP protocol version */ +static int negotiate_nvsp_ver(struct hv_device *device, + struct netvsc_device *net_device, + struct nvsp_message *init_packet, + u32 nvsp_ver) +{ + int ret; + unsigned long t; + + memset(init_packet, 0, sizeof(struct nvsp_message)); + init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT; + init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver; + init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver; + + /* Send the init request */ + ret = vmbus_sendpacket(device->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + + if (ret != 0) + return ret; + + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); + + if (t == 0) + return -ETIMEDOUT; + + if (init_packet->msg.init_msg.init_complete.status != + NVSP_STAT_SUCCESS) + return -EINVAL; + + if (nvsp_ver == NVSP_PROTOCOL_VERSION_1) + return 0; + + /* NVSPv2 only: Send NDIS config */ + memset(init_packet, 0, sizeof(struct nvsp_message)); + init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG; + init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu + + ETH_HLEN; + init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; + + ret = vmbus_sendpacket(device->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, 0); + + return ret; +} + +static int netvsc_connect_vsp(struct hv_device *device) +{ + int ret; + struct netvsc_device *net_device; + struct nvsp_message *init_packet; + int ndis_version; + struct net_device *ndev; + u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, + NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; + int i, num_ver = 4; /* number of different NVSP versions */ + + net_device = get_outbound_net_device(device); + if (!net_device) + return -ENODEV; + ndev = net_device->ndev; + + init_packet = &net_device->channel_init_pkt; + + /* Negotiate the latest NVSP protocol supported */ + for (i = num_ver - 1; i >= 0; i--) + if (negotiate_nvsp_ver(device, net_device, init_packet, + ver_list[i]) == 0) { + net_device->nvsp_version = ver_list[i]; + break; + } + + if (i < 0) { + ret = -EPROTO; + goto cleanup; + } + + pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version); + + /* Send the ndis version */ + memset(init_packet, 0, sizeof(struct nvsp_message)); + + if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4) + ndis_version = 0x00060001; + else + ndis_version = 0x0006001e; + + init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER; + init_packet->msg.v1_msg. + send_ndis_ver.ndis_major_ver = + (ndis_version & 0xFFFF0000) >> 16; + init_packet->msg.v1_msg. + send_ndis_ver.ndis_minor_ver = + ndis_version & 0xFFFF; + + /* Send the init request */ + ret = vmbus_sendpacket(device->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, 0); + if (ret != 0) + goto cleanup; + + /* Post the big receive buffer to NetVSP */ + if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2) + net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; + else + net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; + net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE; + + ret = netvsc_init_buf(device); + +cleanup: + return ret; +} + +static void netvsc_disconnect_vsp(struct netvsc_device *net_device) +{ + netvsc_destroy_buf(net_device); +} + +/* + * netvsc_device_remove - Callback when the root bus device is removed + */ +int netvsc_device_remove(struct hv_device *device) +{ + struct netvsc_device *net_device; + unsigned long flags; + + net_device = hv_get_drvdata(device); + + netvsc_disconnect_vsp(net_device); + + /* + * Since we have already drained, we don't need to busy wait + * as was done in final_release_stor_device() + * Note that we cannot set the ext pointer to NULL until + * we have drained - to drain the outgoing packets, we need to + * allow incoming packets. + */ + + spin_lock_irqsave(&device->channel->inbound_lock, flags); + hv_set_drvdata(device, NULL); + spin_unlock_irqrestore(&device->channel->inbound_lock, flags); + + /* + * At this point, no one should be accessing net_device + * except in here + */ + dev_notice(&device->device, "net device safe to remove\n"); + + /* Now, we can close the channel safely */ + vmbus_close(device->channel); + + /* Release all resources */ + vfree(net_device->sub_cb_buf); + free_netvsc_device(net_device); + return 0; +} + + +#define RING_AVAIL_PERCENT_HIWATER 20 +#define RING_AVAIL_PERCENT_LOWATER 10 + +/* + * Get the percentage of available bytes to write in the ring. + * The return value is in range from 0 to 100. + */ +static inline u32 hv_ringbuf_avail_percent( + struct hv_ring_buffer_info *ring_info) +{ + u32 avail_read, avail_write; + + hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write); + + return avail_write * 100 / ring_info->ring_datasize; +} + +static inline void netvsc_free_send_slot(struct netvsc_device *net_device, + u32 index) +{ + sync_change_bit(index, net_device->send_section_map); +} + +static void netvsc_send_completion(struct netvsc_device *net_device, + struct hv_device *device, + struct vmpacket_descriptor *packet) +{ + struct nvsp_message *nvsp_packet; + struct hv_netvsc_packet *nvsc_packet; + struct net_device *ndev; + u32 send_index; + + ndev = net_device->ndev; + + nvsp_packet = (struct nvsp_message *)((unsigned long)packet + + (packet->offset8 << 3)); + + if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) || + (nvsp_packet->hdr.msg_type == + NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) || + (nvsp_packet->hdr.msg_type == + NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) || + (nvsp_packet->hdr.msg_type == + NVSP_MSG5_TYPE_SUBCHANNEL)) { + /* Copy the response back */ + memcpy(&net_device->channel_init_pkt, nvsp_packet, + sizeof(struct nvsp_message)); + complete(&net_device->channel_init_wait); + } else if (nvsp_packet->hdr.msg_type == + NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) { + int num_outstanding_sends; + u16 q_idx = 0; + struct vmbus_channel *channel = device->channel; + int queue_sends; + + /* Get the send context */ + nvsc_packet = (struct hv_netvsc_packet *)(unsigned long) + packet->trans_id; + + /* Notify the layer above us */ + if (nvsc_packet) { + send_index = nvsc_packet->send_buf_index; + if (send_index != NETVSC_INVALID_INDEX) + netvsc_free_send_slot(net_device, send_index); + q_idx = nvsc_packet->q_idx; + channel = nvsc_packet->channel; + nvsc_packet->send_completion(nvsc_packet-> + send_completion_ctx); + } + + num_outstanding_sends = + atomic_dec_return(&net_device->num_outstanding_sends); + queue_sends = atomic_dec_return(&net_device-> + queue_sends[q_idx]); + + if (net_device->destroy && num_outstanding_sends == 0) + wake_up(&net_device->wait_drain); + + if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && + !net_device->start_remove && + (hv_ringbuf_avail_percent(&channel->outbound) > + RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) + netif_tx_wake_queue(netdev_get_tx_queue( + ndev, q_idx)); + } else { + netdev_err(ndev, "Unknown send completion packet type- " + "%d received!!\n", nvsp_packet->hdr.msg_type); + } + +} + +static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) +{ + unsigned long index; + u32 max_words = net_device->map_words; + unsigned long *map_addr = (unsigned long *)net_device->send_section_map; + u32 section_cnt = net_device->send_section_cnt; + int ret_val = NETVSC_INVALID_INDEX; + int i; + int prev_val; + + for (i = 0; i < max_words; i++) { + if (!~(map_addr[i])) + continue; + index = ffz(map_addr[i]); + prev_val = sync_test_and_set_bit(index, &map_addr[i]); + if (prev_val) + continue; + if ((index + (i * BITS_PER_LONG)) >= section_cnt) + break; + ret_val = (index + (i * BITS_PER_LONG)); + break; + } + return ret_val; +} + +static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, + unsigned int section_index, + u32 pend_size, + struct hv_netvsc_packet *packet) +{ + char *start = net_device->send_buf; + char *dest = start + (section_index * net_device->send_section_size) + + pend_size; + int i; + u32 msg_size = 0; + u32 padding = 0; + u32 remain = packet->total_data_buflen % net_device->pkt_align; + u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : + packet->page_buf_cnt; + + /* Add padding */ + if (packet->is_data_pkt && packet->xmit_more && remain && + !packet->cp_partial) { + padding = net_device->pkt_align - remain; + packet->rndis_msg->msg_len += padding; + packet->total_data_buflen += padding; + } + + for (i = 0; i < page_count; i++) { + char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT); + u32 offset = packet->page_buf[i].offset; + u32 len = packet->page_buf[i].len; + + memcpy(dest, (src + offset), len); + msg_size += len; + dest += len; + } + + if (padding) { + memset(dest, 0, padding); + msg_size += padding; + } + + return msg_size; +} + +static inline int netvsc_send_pkt( + struct hv_netvsc_packet *packet, + struct netvsc_device *net_device) +{ + struct nvsp_message nvmsg; + struct vmbus_channel *out_channel = packet->channel; + u16 q_idx = packet->q_idx; + struct net_device *ndev = net_device->ndev; + u64 req_id; + int ret; + struct hv_page_buffer *pgbuf; + + nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; + if (packet->is_data_pkt) { + /* 0 is RMC_DATA; */ + nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0; + } else { + /* 1 is RMC_CONTROL; */ + nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1; + } + + nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index = + packet->send_buf_index; + if (packet->send_buf_index == NETVSC_INVALID_INDEX) + nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; + else + nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = + packet->total_data_buflen; + + if (packet->send_completion) + req_id = (ulong)packet; + else + req_id = 0; + + if (out_channel->rescind) + return -ENODEV; + + if (packet->page_buf_cnt) { + pgbuf = packet->cp_partial ? packet->page_buf + + packet->rmsg_pgcnt : packet->page_buf; + ret = vmbus_sendpacket_pagebuffer(out_channel, + pgbuf, + packet->page_buf_cnt, + &nvmsg, + sizeof(struct nvsp_message), + req_id); + } else { + ret = vmbus_sendpacket( + out_channel, &nvmsg, + sizeof(struct nvsp_message), + req_id, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + } + + if (ret == 0) { + atomic_inc(&net_device->num_outstanding_sends); + atomic_inc(&net_device->queue_sends[q_idx]); + + if (hv_ringbuf_avail_percent(&out_channel->outbound) < + RING_AVAIL_PERCENT_LOWATER) { + netif_tx_stop_queue(netdev_get_tx_queue( + ndev, q_idx)); + + if (atomic_read(&net_device-> + queue_sends[q_idx]) < 1) + netif_tx_wake_queue(netdev_get_tx_queue( + ndev, q_idx)); + } + } else if (ret == -EAGAIN) { + netif_tx_stop_queue(netdev_get_tx_queue( + ndev, q_idx)); + if (atomic_read(&net_device->queue_sends[q_idx]) < 1) { + netif_tx_wake_queue(netdev_get_tx_queue( + ndev, q_idx)); + ret = -ENOSPC; + } + } else { + netdev_err(ndev, "Unable to send packet %p ret %d\n", + packet, ret); + } + + return ret; +} + +int netvsc_send(struct hv_device *device, + struct hv_netvsc_packet *packet) +{ + struct netvsc_device *net_device; + int ret = 0, m_ret = 0; + struct vmbus_channel *out_channel; + u16 q_idx = packet->q_idx; + u32 pktlen = packet->total_data_buflen, msd_len = 0; + unsigned int section_index = NETVSC_INVALID_INDEX; + unsigned long flag; + struct multi_send_data *msdp; + struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; + bool try_batch; + + net_device = get_outbound_net_device(device); + if (!net_device) + return -ENODEV; + + out_channel = net_device->chn_table[q_idx]; + if (!out_channel) { + out_channel = device->channel; + q_idx = 0; + packet->q_idx = 0; + } + packet->channel = out_channel; + packet->send_buf_index = NETVSC_INVALID_INDEX; + packet->cp_partial = false; + + msdp = &net_device->msd[q_idx]; + + /* batch packets in send buffer if possible */ + spin_lock_irqsave(&msdp->lock, flag); + if (msdp->pkt) + msd_len = msdp->pkt->total_data_buflen; + + try_batch = packet->is_data_pkt && msd_len > 0 && msdp->count < + net_device->max_pkt; + + if (try_batch && msd_len + pktlen + net_device->pkt_align < + net_device->send_section_size) { + section_index = msdp->pkt->send_buf_index; + + } else if (try_batch && msd_len + packet->rmsg_size < + net_device->send_section_size) { + section_index = msdp->pkt->send_buf_index; + packet->cp_partial = true; + + } else if (packet->is_data_pkt && pktlen + net_device->pkt_align < + net_device->send_section_size) { + section_index = netvsc_get_next_send_section(net_device); + if (section_index != NETVSC_INVALID_INDEX) { + msd_send = msdp->pkt; + msdp->pkt = NULL; + msdp->count = 0; + msd_len = 0; + } + } + + if (section_index != NETVSC_INVALID_INDEX) { + netvsc_copy_to_send_buf(net_device, + section_index, msd_len, + packet); + + packet->send_buf_index = section_index; + + if (packet->cp_partial) { + packet->page_buf_cnt -= packet->rmsg_pgcnt; + packet->total_data_buflen = msd_len + packet->rmsg_size; + } else { + packet->page_buf_cnt = 0; + packet->total_data_buflen += msd_len; + } + + if (msdp->pkt) + netvsc_xmit_completion(msdp->pkt); + + if (packet->xmit_more && !packet->cp_partial) { + msdp->pkt = packet; + msdp->count++; + } else { + cur_send = packet; + msdp->pkt = NULL; + msdp->count = 0; + } + } else { + msd_send = msdp->pkt; + msdp->pkt = NULL; + msdp->count = 0; + cur_send = packet; + } + + spin_unlock_irqrestore(&msdp->lock, flag); + + if (msd_send) { + m_ret = netvsc_send_pkt(msd_send, net_device); + + if (m_ret != 0) { + netvsc_free_send_slot(net_device, + msd_send->send_buf_index); + netvsc_xmit_completion(msd_send); + } + } + + if (cur_send) + ret = netvsc_send_pkt(cur_send, net_device); + + if (ret != 0 && section_index != NETVSC_INVALID_INDEX) + netvsc_free_send_slot(net_device, section_index); + + return ret; +} + +static void netvsc_send_recv_completion(struct hv_device *device, + struct vmbus_channel *channel, + struct netvsc_device *net_device, + u64 transaction_id, u32 status) +{ + struct nvsp_message recvcompMessage; + int retries = 0; + int ret; + struct net_device *ndev; + + ndev = net_device->ndev; + + recvcompMessage.hdr.msg_type = + NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE; + + recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status; + +retry_send_cmplt: + /* Send the completion */ + ret = vmbus_sendpacket(channel, &recvcompMessage, + sizeof(struct nvsp_message), transaction_id, + VM_PKT_COMP, 0); + if (ret == 0) { + /* success */ + /* no-op */ + } else if (ret == -EAGAIN) { + /* no more room...wait a bit and attempt to retry 3 times */ + retries++; + netdev_err(ndev, "unable to send receive completion pkt" + " (tid %llx)...retrying %d\n", transaction_id, retries); + + if (retries < 4) { + udelay(100); + goto retry_send_cmplt; + } else { + netdev_err(ndev, "unable to send receive " + "completion pkt (tid %llx)...give up retrying\n", + transaction_id); + } + } else { + netdev_err(ndev, "unable to send receive " + "completion pkt - %llx\n", transaction_id); + } +} + +static void netvsc_receive(struct netvsc_device *net_device, + struct vmbus_channel *channel, + struct hv_device *device, + struct vmpacket_descriptor *packet) +{ + struct vmtransfer_page_packet_header *vmxferpage_packet; + struct nvsp_message *nvsp_packet; + struct hv_netvsc_packet nv_pkt; + struct hv_netvsc_packet *netvsc_packet = &nv_pkt; + u32 status = NVSP_STAT_SUCCESS; + int i; + int count = 0; + struct net_device *ndev; + + ndev = net_device->ndev; + + /* + * All inbound packets other than send completion should be xfer page + * packet + */ + if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) { + netdev_err(ndev, "Unknown packet type received - %d\n", + packet->type); + return; + } + + nvsp_packet = (struct nvsp_message *)((unsigned long)packet + + (packet->offset8 << 3)); + + /* Make sure this is a valid nvsp packet */ + if (nvsp_packet->hdr.msg_type != + NVSP_MSG1_TYPE_SEND_RNDIS_PKT) { + netdev_err(ndev, "Unknown nvsp packet type received-" + " %d\n", nvsp_packet->hdr.msg_type); + return; + } + + vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet; + + if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) { + netdev_err(ndev, "Invalid xfer page set id - " + "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID, + vmxferpage_packet->xfer_pageset_id); + return; + } + + count = vmxferpage_packet->range_cnt; + netvsc_packet->channel = channel; + + /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ + for (i = 0; i < count; i++) { + /* Initialize the netvsc packet */ + netvsc_packet->status = NVSP_STAT_SUCCESS; + netvsc_packet->data = (void *)((unsigned long)net_device-> + recv_buf + vmxferpage_packet->ranges[i].byte_offset); + netvsc_packet->total_data_buflen = + vmxferpage_packet->ranges[i].byte_count; + + /* Pass it to the upper layer */ + rndis_filter_receive(device, netvsc_packet); + + if (netvsc_packet->status != NVSP_STAT_SUCCESS) + status = NVSP_STAT_FAIL; + } + + netvsc_send_recv_completion(device, channel, net_device, + vmxferpage_packet->d.trans_id, status); +} + + +static void netvsc_send_table(struct hv_device *hdev, + struct vmpacket_descriptor *vmpkt) +{ + struct netvsc_device *nvscdev; + struct net_device *ndev; + struct nvsp_message *nvmsg; + int i; + u32 count, *tab; + + nvscdev = get_outbound_net_device(hdev); + if (!nvscdev) + return; + ndev = nvscdev->ndev; + + nvmsg = (struct nvsp_message *)((unsigned long)vmpkt + + (vmpkt->offset8 << 3)); + + if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE) + return; + + count = nvmsg->msg.v5_msg.send_table.count; + if (count != VRSS_SEND_TAB_SIZE) { + netdev_err(ndev, "Received wrong send-table size:%u\n", count); + return; + } + + tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table + + nvmsg->msg.v5_msg.send_table.offset); + + for (i = 0; i < count; i++) + nvscdev->send_table[i] = tab[i]; +} + +void netvsc_channel_cb(void *context) +{ + int ret; + struct vmbus_channel *channel = (struct vmbus_channel *)context; + struct hv_device *device; + struct netvsc_device *net_device; + u32 bytes_recvd; + u64 request_id; + struct vmpacket_descriptor *desc; + unsigned char *buffer; + int bufferlen = NETVSC_PACKET_SIZE; + struct net_device *ndev; + + if (channel->primary_channel != NULL) + device = channel->primary_channel->device_obj; + else + device = channel->device_obj; + + net_device = get_inbound_net_device(device); + if (!net_device) + return; + ndev = net_device->ndev; + buffer = get_per_channel_state(channel); + + do { + ret = vmbus_recvpacket_raw(channel, buffer, bufferlen, + &bytes_recvd, &request_id); + if (ret == 0) { + if (bytes_recvd > 0) { + desc = (struct vmpacket_descriptor *)buffer; + switch (desc->type) { + case VM_PKT_COMP: + netvsc_send_completion(net_device, + device, desc); + break; + + case VM_PKT_DATA_USING_XFER_PAGES: + netvsc_receive(net_device, channel, + device, desc); + break; + + case VM_PKT_DATA_INBAND: + netvsc_send_table(device, desc); + break; + + default: + netdev_err(ndev, + "unhandled packet type %d, " + "tid %llx len %d\n", + desc->type, request_id, + bytes_recvd); + break; + } + + } else { + /* + * We are done for this pass. + */ + break; + } + + } else if (ret == -ENOBUFS) { + if (bufferlen > NETVSC_PACKET_SIZE) + kfree(buffer); + /* Handle large packet */ + buffer = kmalloc(bytes_recvd, GFP_ATOMIC); + if (buffer == NULL) { + /* Try again next time around */ + netdev_err(ndev, + "unable to allocate buffer of size " + "(%d)!!\n", bytes_recvd); + break; + } + + bufferlen = bytes_recvd; + } + } while (1); + + if (bufferlen > NETVSC_PACKET_SIZE) + kfree(buffer); + return; +} + +/* + * netvsc_device_add - Callback when the device belonging to this + * driver is added + */ +int netvsc_device_add(struct hv_device *device, void *additional_info) +{ + int ret = 0; + int ring_size = + ((struct netvsc_device_info *)additional_info)->ring_size; + struct netvsc_device *net_device; + struct net_device *ndev; + + net_device = alloc_net_device(device); + if (!net_device) + return -ENOMEM; + + net_device->ring_size = ring_size; + + /* + * Coming into this function, struct net_device * is + * registered as the driver private data. + * In alloc_net_device(), we register struct netvsc_device * + * as the driver private data and stash away struct net_device * + * in struct netvsc_device *. + */ + ndev = net_device->ndev; + + /* Add netvsc_device context to netvsc_device */ + net_device->nd_ctx = netdev_priv(ndev); + + /* Initialize the NetVSC channel extension */ + init_completion(&net_device->channel_init_wait); + + set_per_channel_state(device->channel, net_device->cb_buffer); + + /* Open the channel */ + ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, + ring_size * PAGE_SIZE, NULL, 0, + netvsc_channel_cb, device->channel); + + if (ret != 0) { + netdev_err(ndev, "unable to open channel: %d\n", ret); + goto cleanup; + } + + /* Channel is opened */ + pr_info("hv_netvsc channel opened successfully\n"); + + net_device->chn_table[0] = device->channel; + + /* Connect with the NetVsp */ + ret = netvsc_connect_vsp(device); + if (ret != 0) { + netdev_err(ndev, + "unable to connect to NetVSP - %d\n", ret); + goto close; + } + + return ret; + +close: + /* Now, we can close the channel safely */ + vmbus_close(device->channel); + +cleanup: + free_netvsc_device(net_device); + + return ret; +} diff --git a/kernel/drivers/net/hyperv/netvsc_drv.c b/kernel/drivers/net/hyperv/netvsc_drv.c new file mode 100644 index 000000000..5993c7e2d --- /dev/null +++ b/kernel/drivers/net/hyperv/netvsc_drv.c @@ -0,0 +1,1004 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/atomic.h> +#include <linux/module.h> +#include <linux/highmem.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/slab.h> +#include <net/arp.h> +#include <net/route.h> +#include <net/sock.h> +#include <net/pkt_sched.h> + +#include "hyperv_net.h" + + +#define RING_SIZE_MIN 64 +static int ring_size = 128; +module_param(ring_size, int, S_IRUGO); +MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); + +static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | + NETIF_MSG_LINK | NETIF_MSG_IFUP | + NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR | + NETIF_MSG_TX_ERR; + +static int debug = -1; +module_param(debug, int, S_IRUGO); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +static void do_set_multicast(struct work_struct *w) +{ + struct net_device_context *ndevctx = + container_of(w, struct net_device_context, work); + struct netvsc_device *nvdev; + struct rndis_device *rdev; + + nvdev = hv_get_drvdata(ndevctx->device_ctx); + if (nvdev == NULL || nvdev->ndev == NULL) + return; + + rdev = nvdev->extension; + if (rdev == NULL) + return; + + if (nvdev->ndev->flags & IFF_PROMISC) + rndis_filter_set_packet_filter(rdev, + NDIS_PACKET_TYPE_PROMISCUOUS); + else + rndis_filter_set_packet_filter(rdev, + NDIS_PACKET_TYPE_BROADCAST | + NDIS_PACKET_TYPE_ALL_MULTICAST | + NDIS_PACKET_TYPE_DIRECTED); +} + +static void netvsc_set_multicast_list(struct net_device *net) +{ + struct net_device_context *net_device_ctx = netdev_priv(net); + + schedule_work(&net_device_ctx->work); +} + +static int netvsc_open(struct net_device *net) +{ + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_device *device_obj = net_device_ctx->device_ctx; + struct netvsc_device *nvdev; + struct rndis_device *rdev; + int ret = 0; + + netif_carrier_off(net); + + /* Open up the device */ + ret = rndis_filter_open(device_obj); + if (ret != 0) { + netdev_err(net, "unable to open device (ret %d).\n", ret); + return ret; + } + + netif_tx_start_all_queues(net); + + nvdev = hv_get_drvdata(device_obj); + rdev = nvdev->extension; + if (!rdev->link_state) + netif_carrier_on(net); + + return ret; +} + +static int netvsc_close(struct net_device *net) +{ + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_device *device_obj = net_device_ctx->device_ctx; + int ret; + + netif_tx_disable(net); + + /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */ + cancel_work_sync(&net_device_ctx->work); + ret = rndis_filter_close(device_obj); + if (ret != 0) + netdev_err(net, "unable to close device (ret %d).\n", ret); + + return ret; +} + +static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, + int pkt_type) +{ + struct rndis_packet *rndis_pkt; + struct rndis_per_packet_info *ppi; + + rndis_pkt = &msg->msg.pkt; + rndis_pkt->data_offset += ppi_size; + + ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + + rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); + + ppi->size = ppi_size; + ppi->type = pkt_type; + ppi->ppi_offset = sizeof(struct rndis_per_packet_info); + + rndis_pkt->per_pkt_info_len += ppi_size; + + return ppi; +} + +union sub_key { + u64 k; + struct { + u8 pad[3]; + u8 kb; + u32 ka; + }; +}; + +/* Toeplitz hash function + * data: network byte order + * return: host byte order + */ +static u32 comp_hash(u8 *key, int klen, void *data, int dlen) +{ + union sub_key subk; + int k_next = 4; + u8 dt; + int i, j; + u32 ret = 0; + + subk.k = 0; + subk.ka = ntohl(*(u32 *)key); + + for (i = 0; i < dlen; i++) { + subk.kb = key[k_next]; + k_next = (k_next + 1) % klen; + dt = ((u8 *)data)[i]; + for (j = 0; j < 8; j++) { + if (dt & 0x80) + ret ^= subk.ka; + dt <<= 1; + subk.k <<= 1; + } + } + + return ret; +} + +static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) +{ + struct flow_keys flow; + int data_len; + + if (!skb_flow_dissect(skb, &flow) || + !(flow.n_proto == htons(ETH_P_IP) || + flow.n_proto == htons(ETH_P_IPV6))) + return false; + + if (flow.ip_proto == IPPROTO_TCP) + data_len = 12; + else + data_len = 8; + + *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, &flow, data_len); + + return true; +} + +static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + struct net_device_context *net_device_ctx = netdev_priv(ndev); + struct hv_device *hdev = net_device_ctx->device_ctx; + struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev); + u32 hash; + u16 q_idx = 0; + + if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) + return 0; + + if (netvsc_set_hash(&hash, skb)) { + q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % + ndev->real_num_tx_queues; + skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); + } + + return q_idx; +} + +void netvsc_xmit_completion(void *context) +{ + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; + struct sk_buff *skb = (struct sk_buff *) + (unsigned long)packet->send_completion_tid; + + if (skb) + dev_kfree_skb_any(skb); +} + +static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, + struct hv_page_buffer *pb) +{ + int j = 0; + + /* Deal with compund pages by ignoring unused part + * of the page. + */ + page += (offset >> PAGE_SHIFT); + offset &= ~PAGE_MASK; + + while (len > 0) { + unsigned long bytes; + + bytes = PAGE_SIZE - offset; + if (bytes > len) + bytes = len; + pb[j].pfn = page_to_pfn(page); + pb[j].offset = offset; + pb[j].len = bytes; + + offset += bytes; + len -= bytes; + + if (offset == PAGE_SIZE && len) { + page++; + offset = 0; + j++; + } + } + + return j + 1; +} + +static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, + struct hv_netvsc_packet *packet) +{ + struct hv_page_buffer *pb = packet->page_buf; + u32 slots_used = 0; + char *data = skb->data; + int frags = skb_shinfo(skb)->nr_frags; + int i; + + /* The packet is laid out thus: + * 1. hdr: RNDIS header and PPI + * 2. skb linear data + * 3. skb fragment data + */ + if (hdr != NULL) + slots_used += fill_pg_buf(virt_to_page(hdr), + offset_in_page(hdr), + len, &pb[slots_used]); + + packet->rmsg_size = len; + packet->rmsg_pgcnt = slots_used; + + slots_used += fill_pg_buf(virt_to_page(data), + offset_in_page(data), + skb_headlen(skb), &pb[slots_used]); + + for (i = 0; i < frags; i++) { + skb_frag_t *frag = skb_shinfo(skb)->frags + i; + + slots_used += fill_pg_buf(skb_frag_page(frag), + frag->page_offset, + skb_frag_size(frag), &pb[slots_used]); + } + return slots_used; +} + +static int count_skb_frag_slots(struct sk_buff *skb) +{ + int i, frags = skb_shinfo(skb)->nr_frags; + int pages = 0; + + for (i = 0; i < frags; i++) { + skb_frag_t *frag = skb_shinfo(skb)->frags + i; + unsigned long size = skb_frag_size(frag); + unsigned long offset = frag->page_offset; + + /* Skip unused frames from start of page */ + offset &= ~PAGE_MASK; + pages += PFN_UP(offset + size); + } + return pages; +} + +static int netvsc_get_slots(struct sk_buff *skb) +{ + char *data = skb->data; + unsigned int offset = offset_in_page(data); + unsigned int len = skb_headlen(skb); + int slots; + int frag_slots; + + slots = DIV_ROUND_UP(offset + len, PAGE_SIZE); + frag_slots = count_skb_frag_slots(skb); + return slots + frag_slots; +} + +static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off) +{ + u32 ret_val = TRANSPORT_INFO_NOT_IP; + + if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) && + (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) { + goto not_ip; + } + + *trans_off = skb_transport_offset(skb); + + if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) { + struct iphdr *iphdr = ip_hdr(skb); + + if (iphdr->protocol == IPPROTO_TCP) + ret_val = TRANSPORT_INFO_IPV4_TCP; + else if (iphdr->protocol == IPPROTO_UDP) + ret_val = TRANSPORT_INFO_IPV4_UDP; + } else { + if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) + ret_val = TRANSPORT_INFO_IPV6_TCP; + else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) + ret_val = TRANSPORT_INFO_IPV6_UDP; + } + +not_ip: + return ret_val; +} + +static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) +{ + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_netvsc_packet *packet = NULL; + int ret; + unsigned int num_data_pgs; + struct rndis_message *rndis_msg; + struct rndis_packet *rndis_pkt; + u32 rndis_msg_size; + bool isvlan; + bool linear = false; + struct rndis_per_packet_info *ppi; + struct ndis_tcp_ip_checksum_info *csum_info; + struct ndis_tcp_lso_info *lso_info; + int hdr_offset; + u32 net_trans_info; + u32 hash; + u32 skb_length; + u32 pkt_sz; + struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; + + + /* We will atmost need two pages to describe the rndis + * header. We can only transmit MAX_PAGE_BUFFER_COUNT number + * of pages in a single packet. If skb is scattered around + * more pages we try linearizing it. + */ + +check_size: + skb_length = skb->len; + num_data_pgs = netvsc_get_slots(skb) + 2; + if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) { + net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", + num_data_pgs, skb->len); + ret = -EFAULT; + goto drop; + } else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { + if (skb_linearize(skb)) { + net_alert_ratelimited("failed to linearize skb\n"); + ret = -ENOMEM; + goto drop; + } + linear = true; + goto check_size; + } + + pkt_sz = sizeof(struct hv_netvsc_packet) + RNDIS_AND_PPI_SIZE; + + ret = skb_cow_head(skb, pkt_sz); + if (ret) { + netdev_err(net, "unable to alloc hv_netvsc_packet\n"); + ret = -ENOMEM; + goto drop; + } + /* Use the headroom for building up the packet */ + packet = (struct hv_netvsc_packet *)skb->head; + + packet->status = 0; + packet->xmit_more = skb->xmit_more; + + packet->vlan_tci = skb->vlan_tci; + packet->page_buf = page_buf; + + packet->q_idx = skb_get_queue_mapping(skb); + + packet->is_data_pkt = true; + packet->total_data_buflen = skb->len; + + packet->rndis_msg = (struct rndis_message *)((unsigned long)packet + + sizeof(struct hv_netvsc_packet)); + + memset(packet->rndis_msg, 0, RNDIS_AND_PPI_SIZE); + + /* Set the completion routine */ + packet->send_completion = netvsc_xmit_completion; + packet->send_completion_ctx = packet; + packet->send_completion_tid = (unsigned long)skb; + + isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; + + /* Add the rndis header */ + rndis_msg = packet->rndis_msg; + rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; + rndis_msg->msg_len = packet->total_data_buflen; + rndis_pkt = &rndis_msg->msg.pkt; + rndis_pkt->data_offset = sizeof(struct rndis_packet); + rndis_pkt->data_len = packet->total_data_buflen; + rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); + + rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); + + hash = skb_get_hash_raw(skb); + if (hash != 0 && net->real_num_tx_queues > 1) { + rndis_msg_size += NDIS_HASH_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, + NBL_HASH_VALUE); + *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; + } + + if (isvlan) { + struct ndis_pkt_8021q_info *vlan; + + rndis_msg_size += NDIS_VLAN_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, + IEEE_8021Q_INFO); + vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + + ppi->ppi_offset); + vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK; + vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >> + VLAN_PRIO_SHIFT; + } + + net_trans_info = get_net_transport_info(skb, &hdr_offset); + if (net_trans_info == TRANSPORT_INFO_NOT_IP) + goto do_send; + + /* + * Setup the sendside checksum offload only if this is not a + * GSO packet. + */ + if (skb_is_gso(skb)) + goto do_lso; + + if ((skb->ip_summed == CHECKSUM_NONE) || + (skb->ip_summed == CHECKSUM_UNNECESSARY)) + goto do_send; + + rndis_msg_size += NDIS_CSUM_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, + TCPIP_CHKSUM_PKTINFO); + + csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + + ppi->ppi_offset); + + if (net_trans_info & (INFO_IPV4 << 16)) + csum_info->transmit.is_ipv4 = 1; + else + csum_info->transmit.is_ipv6 = 1; + + if (net_trans_info & INFO_TCP) { + csum_info->transmit.tcp_checksum = 1; + csum_info->transmit.tcp_header_offset = hdr_offset; + } else if (net_trans_info & INFO_UDP) { + /* UDP checksum offload is not supported on ws2008r2. + * Furthermore, on ws2012 and ws2012r2, there are some + * issues with udp checksum offload from Linux guests. + * (these are host issues). + * For now compute the checksum here. + */ + struct udphdr *uh; + u16 udp_len; + + ret = skb_cow_head(skb, 0); + if (ret) + goto drop; + + uh = udp_hdr(skb); + udp_len = ntohs(uh->len); + uh->check = 0; + uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + udp_len, IPPROTO_UDP, + csum_partial(uh, udp_len, 0)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + csum_info->transmit.udp_checksum = 0; + } + goto do_send; + +do_lso: + rndis_msg_size += NDIS_LSO_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, + TCP_LARGESEND_PKTINFO); + + lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + + ppi->ppi_offset); + + lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; + if (net_trans_info & (INFO_IPV4 << 16)) { + lso_info->lso_v2_transmit.ip_version = + NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; + ip_hdr(skb)->tot_len = 0; + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + } else { + lso_info->lso_v2_transmit.ip_version = + NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + } + lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; + lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; + +do_send: + /* Start filling in the page buffers with the rndis hdr */ + rndis_msg->msg_len += rndis_msg_size; + packet->total_data_buflen = rndis_msg->msg_len; + packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, + skb, packet); + + ret = netvsc_send(net_device_ctx->device_ctx, packet); + +drop: + if (ret == 0) { + net->stats.tx_bytes += skb_length; + net->stats.tx_packets++; + } else { + if (ret != -EAGAIN) { + dev_kfree_skb_any(skb); + net->stats.tx_dropped++; + } + } + + return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK; +} + +/* + * netvsc_linkstatus_callback - Link up/down notification + */ +void netvsc_linkstatus_callback(struct hv_device *device_obj, + struct rndis_message *resp) +{ + struct rndis_indicate_status *indicate = &resp->msg.indicate_status; + struct net_device *net; + struct net_device_context *ndev_ctx; + struct netvsc_device *net_device; + struct rndis_device *rdev; + + net_device = hv_get_drvdata(device_obj); + rdev = net_device->extension; + + switch (indicate->status) { + case RNDIS_STATUS_MEDIA_CONNECT: + rdev->link_state = false; + break; + case RNDIS_STATUS_MEDIA_DISCONNECT: + rdev->link_state = true; + break; + case RNDIS_STATUS_NETWORK_CHANGE: + rdev->link_change = true; + break; + default: + return; + } + + net = net_device->ndev; + + if (!net || net->reg_state != NETREG_REGISTERED) + return; + + ndev_ctx = netdev_priv(net); + if (!rdev->link_state) { + schedule_delayed_work(&ndev_ctx->dwork, 0); + schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); + } else { + schedule_delayed_work(&ndev_ctx->dwork, 0); + } +} + +/* + * netvsc_recv_callback - Callback when we receive a packet from the + * "wire" on the specified device. + */ +int netvsc_recv_callback(struct hv_device *device_obj, + struct hv_netvsc_packet *packet, + struct ndis_tcp_ip_checksum_info *csum_info) +{ + struct net_device *net; + struct sk_buff *skb; + + net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; + if (!net || net->reg_state != NETREG_REGISTERED) { + packet->status = NVSP_STAT_FAIL; + return 0; + } + + /* Allocate a skb - TODO direct I/O to pages? */ + skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); + if (unlikely(!skb)) { + ++net->stats.rx_dropped; + packet->status = NVSP_STAT_FAIL; + return 0; + } + + /* + * Copy to skb. This copy is needed here since the memory pointed by + * hv_netvsc_packet cannot be deallocated + */ + memcpy(skb_put(skb, packet->total_data_buflen), packet->data, + packet->total_data_buflen); + + skb->protocol = eth_type_trans(skb, net); + if (csum_info) { + /* We only look at the IP checksum here. + * Should we be dropping the packet if checksum + * failed? How do we deal with other checksums - TCP/UDP? + */ + if (csum_info->receive.ip_checksum_succeeded) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; + } + + if (packet->vlan_tci & VLAN_TAG_PRESENT) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + packet->vlan_tci); + + skb_record_rx_queue(skb, packet->channel-> + offermsg.offer.sub_channel_index); + + net->stats.rx_packets++; + net->stats.rx_bytes += packet->total_data_buflen; + + /* + * Pass the skb back up. Network stack will deallocate the skb when it + * is done. + * TODO - use NAPI? + */ + netif_rx(skb); + + return 0; +} + +static void netvsc_get_drvinfo(struct net_device *net, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); +} + +static void netvsc_get_channels(struct net_device *net, + struct ethtool_channels *channel) +{ + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_device *dev = net_device_ctx->device_ctx; + struct netvsc_device *nvdev = hv_get_drvdata(dev); + + if (nvdev) { + channel->max_combined = nvdev->max_chn; + channel->combined_count = nvdev->num_chn; + } +} + +static int netvsc_change_mtu(struct net_device *ndev, int mtu) +{ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct hv_device *hdev = ndevctx->device_ctx; + struct netvsc_device *nvdev = hv_get_drvdata(hdev); + struct netvsc_device_info device_info; + int limit = ETH_DATA_LEN; + + if (nvdev == NULL || nvdev->destroy) + return -ENODEV; + + if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) + limit = NETVSC_MTU - ETH_HLEN; + + /* Hyper-V hosts don't support MTU < ETH_DATA_LEN (1500) */ + if (mtu < ETH_DATA_LEN || mtu > limit) + return -EINVAL; + + nvdev->start_remove = true; + cancel_work_sync(&ndevctx->work); + netif_tx_disable(ndev); + rndis_filter_device_remove(hdev); + + ndev->mtu = mtu; + + ndevctx->device_ctx = hdev; + hv_set_drvdata(hdev, ndev); + device_info.ring_size = ring_size; + rndis_filter_device_add(hdev, &device_info); + netif_tx_wake_all_queues(ndev); + + return 0; +} + + +static int netvsc_set_mac_addr(struct net_device *ndev, void *p) +{ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct hv_device *hdev = ndevctx->device_ctx; + struct sockaddr *addr = p; + char save_adr[ETH_ALEN]; + unsigned char save_aatype; + int err; + + memcpy(save_adr, ndev->dev_addr, ETH_ALEN); + save_aatype = ndev->addr_assign_type; + + err = eth_mac_addr(ndev, p); + if (err != 0) + return err; + + err = rndis_filter_set_device_mac(hdev, addr->sa_data); + if (err != 0) { + /* roll back to saved MAC */ + memcpy(ndev->dev_addr, save_adr, ETH_ALEN); + ndev->addr_assign_type = save_aatype; + } + + return err; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void netvsc_poll_controller(struct net_device *net) +{ + /* As netvsc_start_xmit() works synchronous we don't have to + * trigger anything here. + */ +} +#endif + +static const struct ethtool_ops ethtool_ops = { + .get_drvinfo = netvsc_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_channels = netvsc_get_channels, +}; + +static const struct net_device_ops device_ops = { + .ndo_open = netvsc_open, + .ndo_stop = netvsc_close, + .ndo_start_xmit = netvsc_start_xmit, + .ndo_set_rx_mode = netvsc_set_multicast_list, + .ndo_change_mtu = netvsc_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = netvsc_set_mac_addr, + .ndo_select_queue = netvsc_select_queue, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = netvsc_poll_controller, +#endif +}; + +/* + * Send GARP packet to network peers after migrations. + * After Quick Migration, the network is not immediately operational in the + * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add + * another netif_notify_peers() into a delayed work, otherwise GARP packet + * will not be sent after quick migration, and cause network disconnection. + * Also, we update the carrier status here. + */ +static void netvsc_link_change(struct work_struct *w) +{ + struct net_device_context *ndev_ctx; + struct net_device *net; + struct netvsc_device *net_device; + struct rndis_device *rdev; + bool notify, refresh = false; + char *argv[] = { "/etc/init.d/network", "restart", NULL }; + char *envp[] = { "HOME=/", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; + + rtnl_lock(); + + ndev_ctx = container_of(w, struct net_device_context, dwork.work); + net_device = hv_get_drvdata(ndev_ctx->device_ctx); + rdev = net_device->extension; + net = net_device->ndev; + + if (rdev->link_state) { + netif_carrier_off(net); + notify = false; + } else { + netif_carrier_on(net); + notify = true; + if (rdev->link_change) { + rdev->link_change = false; + refresh = true; + } + } + + rtnl_unlock(); + + if (refresh) + call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + + if (notify) + netdev_notify_peers(net); +} + + +static int netvsc_probe(struct hv_device *dev, + const struct hv_vmbus_device_id *dev_id) +{ + struct net_device *net = NULL; + struct net_device_context *net_device_ctx; + struct netvsc_device_info device_info; + struct netvsc_device *nvdev; + int ret; + u32 max_needed_headroom; + + net = alloc_etherdev_mq(sizeof(struct net_device_context), + num_online_cpus()); + if (!net) + return -ENOMEM; + + max_needed_headroom = sizeof(struct hv_netvsc_packet) + + RNDIS_AND_PPI_SIZE; + + netif_carrier_off(net); + + net_device_ctx = netdev_priv(net); + net_device_ctx->device_ctx = dev; + net_device_ctx->msg_enable = netif_msg_init(debug, default_msg); + if (netif_msg_probe(net_device_ctx)) + netdev_dbg(net, "netvsc msg_enable: %d\n", + net_device_ctx->msg_enable); + + hv_set_drvdata(dev, net); + INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); + INIT_WORK(&net_device_ctx->work, do_set_multicast); + + net->netdev_ops = &device_ops; + + net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | + NETIF_F_TSO; + net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | + NETIF_F_IP_CSUM | NETIF_F_TSO; + + net->ethtool_ops = ðtool_ops; + SET_NETDEV_DEV(net, &dev->device); + + /* + * Request additional head room in the skb. + * We will use this space to build the rndis + * heaser and other state we need to maintain. + */ + net->needed_headroom = max_needed_headroom; + + /* Notify the netvsc driver of the new device */ + device_info.ring_size = ring_size; + ret = rndis_filter_device_add(dev, &device_info); + if (ret != 0) { + netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); + free_netdev(net); + hv_set_drvdata(dev, NULL); + return ret; + } + memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + + nvdev = hv_get_drvdata(dev); + netif_set_real_num_tx_queues(net, nvdev->num_chn); + netif_set_real_num_rx_queues(net, nvdev->num_chn); + + ret = register_netdev(net); + if (ret != 0) { + pr_err("Unable to register netdev.\n"); + rndis_filter_device_remove(dev); + free_netdev(net); + } else { + schedule_delayed_work(&net_device_ctx->dwork, 0); + } + + return ret; +} + +static int netvsc_remove(struct hv_device *dev) +{ + struct net_device *net; + struct net_device_context *ndev_ctx; + struct netvsc_device *net_device; + + net_device = hv_get_drvdata(dev); + net = net_device->ndev; + + if (net == NULL) { + dev_err(&dev->device, "No net device to remove\n"); + return 0; + } + + net_device->start_remove = true; + + ndev_ctx = netdev_priv(net); + cancel_delayed_work_sync(&ndev_ctx->dwork); + cancel_work_sync(&ndev_ctx->work); + + /* Stop outbound asap */ + netif_tx_disable(net); + + unregister_netdev(net); + + /* + * Call to the vsc driver to let it know that the device is being + * removed + */ + rndis_filter_device_remove(dev); + + free_netdev(net); + return 0; +} + +static const struct hv_vmbus_device_id id_table[] = { + /* Network guid */ + { HV_NIC_GUID, }, + { }, +}; + +MODULE_DEVICE_TABLE(vmbus, id_table); + +/* The one and only one */ +static struct hv_driver netvsc_drv = { + .name = KBUILD_MODNAME, + .id_table = id_table, + .probe = netvsc_probe, + .remove = netvsc_remove, +}; + +static void __exit netvsc_drv_exit(void) +{ + vmbus_driver_unregister(&netvsc_drv); +} + +static int __init netvsc_drv_init(void) +{ + if (ring_size < RING_SIZE_MIN) { + ring_size = RING_SIZE_MIN; + pr_info("Increased ring_size to %d (min allowed)\n", + ring_size); + } + return vmbus_driver_register(&netvsc_drv); +} + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Microsoft Hyper-V network driver"); + +module_init(netvsc_drv_init); +module_exit(netvsc_drv_exit); diff --git a/kernel/drivers/net/hyperv/rndis_filter.c b/kernel/drivers/net/hyperv/rndis_filter.c new file mode 100644 index 000000000..9118cea91 --- /dev/null +++ b/kernel/drivers/net/hyperv/rndis_filter.c @@ -0,0 +1,1192 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/highmem.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/if_ether.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <linux/nls.h> + +#include "hyperv_net.h" + + +#define RNDIS_EXT_LEN PAGE_SIZE +struct rndis_request { + struct list_head list_ent; + struct completion wait_event; + + struct rndis_message response_msg; + /* + * The buffer for extended info after the RNDIS response message. It's + * referenced based on the data offset in the RNDIS message. Its size + * is enough for current needs, and should be sufficient for the near + * future. + */ + u8 response_ext[RNDIS_EXT_LEN]; + + /* Simplify allocation by having a netvsc packet inline */ + struct hv_netvsc_packet pkt; + + struct rndis_message request_msg; + /* + * The buffer for the extended info after the RNDIS request message. + * It is referenced and sized in a similar way as response_ext. + */ + u8 request_ext[RNDIS_EXT_LEN]; +}; + +static struct rndis_device *get_rndis_device(void) +{ + struct rndis_device *device; + + device = kzalloc(sizeof(struct rndis_device), GFP_KERNEL); + if (!device) + return NULL; + + spin_lock_init(&device->request_lock); + + INIT_LIST_HEAD(&device->req_list); + + device->state = RNDIS_DEV_UNINITIALIZED; + + return device; +} + +static struct rndis_request *get_rndis_request(struct rndis_device *dev, + u32 msg_type, + u32 msg_len) +{ + struct rndis_request *request; + struct rndis_message *rndis_msg; + struct rndis_set_request *set; + unsigned long flags; + + request = kzalloc(sizeof(struct rndis_request), GFP_KERNEL); + if (!request) + return NULL; + + init_completion(&request->wait_event); + + rndis_msg = &request->request_msg; + rndis_msg->ndis_msg_type = msg_type; + rndis_msg->msg_len = msg_len; + + request->pkt.q_idx = 0; + + /* + * Set the request id. This field is always after the rndis header for + * request/response packet types so we just used the SetRequest as a + * template + */ + set = &rndis_msg->msg.set_req; + set->req_id = atomic_inc_return(&dev->new_req_id); + + /* Add to the request list */ + spin_lock_irqsave(&dev->request_lock, flags); + list_add_tail(&request->list_ent, &dev->req_list); + spin_unlock_irqrestore(&dev->request_lock, flags); + + return request; +} + +static void put_rndis_request(struct rndis_device *dev, + struct rndis_request *req) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->request_lock, flags); + list_del(&req->list_ent); + spin_unlock_irqrestore(&dev->request_lock, flags); + + kfree(req); +} + +static void dump_rndis_message(struct hv_device *hv_dev, + struct rndis_message *rndis_msg) +{ + struct net_device *netdev; + struct netvsc_device *net_device; + + net_device = hv_get_drvdata(hv_dev); + netdev = net_device->ndev; + + switch (rndis_msg->ndis_msg_type) { + case RNDIS_MSG_PACKET: + netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, " + "data offset %u data len %u, # oob %u, " + "oob offset %u, oob len %u, pkt offset %u, " + "pkt len %u\n", + rndis_msg->msg_len, + rndis_msg->msg.pkt.data_offset, + rndis_msg->msg.pkt.data_len, + rndis_msg->msg.pkt.num_oob_data_elements, + rndis_msg->msg.pkt.oob_data_offset, + rndis_msg->msg.pkt.oob_data_len, + rndis_msg->msg.pkt.per_pkt_info_offset, + rndis_msg->msg.pkt.per_pkt_info_len); + break; + + case RNDIS_MSG_INIT_C: + netdev_dbg(netdev, "RNDIS_MSG_INIT_C " + "(len %u, id 0x%x, status 0x%x, major %d, minor %d, " + "device flags %d, max xfer size 0x%x, max pkts %u, " + "pkt aligned %u)\n", + rndis_msg->msg_len, + rndis_msg->msg.init_complete.req_id, + rndis_msg->msg.init_complete.status, + rndis_msg->msg.init_complete.major_ver, + rndis_msg->msg.init_complete.minor_ver, + rndis_msg->msg.init_complete.dev_flags, + rndis_msg->msg.init_complete.max_xfer_size, + rndis_msg->msg.init_complete. + max_pkt_per_msg, + rndis_msg->msg.init_complete. + pkt_alignment_factor); + break; + + case RNDIS_MSG_QUERY_C: + netdev_dbg(netdev, "RNDIS_MSG_QUERY_C " + "(len %u, id 0x%x, status 0x%x, buf len %u, " + "buf offset %u)\n", + rndis_msg->msg_len, + rndis_msg->msg.query_complete.req_id, + rndis_msg->msg.query_complete.status, + rndis_msg->msg.query_complete. + info_buflen, + rndis_msg->msg.query_complete. + info_buf_offset); + break; + + case RNDIS_MSG_SET_C: + netdev_dbg(netdev, + "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n", + rndis_msg->msg_len, + rndis_msg->msg.set_complete.req_id, + rndis_msg->msg.set_complete.status); + break; + + case RNDIS_MSG_INDICATE: + netdev_dbg(netdev, "RNDIS_MSG_INDICATE " + "(len %u, status 0x%x, buf len %u, buf offset %u)\n", + rndis_msg->msg_len, + rndis_msg->msg.indicate_status.status, + rndis_msg->msg.indicate_status.status_buflen, + rndis_msg->msg.indicate_status.status_buf_offset); + break; + + default: + netdev_dbg(netdev, "0x%x (len %u)\n", + rndis_msg->ndis_msg_type, + rndis_msg->msg_len); + break; + } +} + +static int rndis_filter_send_request(struct rndis_device *dev, + struct rndis_request *req) +{ + int ret; + struct hv_netvsc_packet *packet; + struct hv_page_buffer page_buf[2]; + + /* Setup the packet to send it */ + packet = &req->pkt; + + packet->is_data_pkt = false; + packet->total_data_buflen = req->request_msg.msg_len; + packet->page_buf_cnt = 1; + packet->page_buf = page_buf; + + packet->page_buf[0].pfn = virt_to_phys(&req->request_msg) >> + PAGE_SHIFT; + packet->page_buf[0].len = req->request_msg.msg_len; + packet->page_buf[0].offset = + (unsigned long)&req->request_msg & (PAGE_SIZE - 1); + + /* Add one page_buf when request_msg crossing page boundary */ + if (packet->page_buf[0].offset + packet->page_buf[0].len > PAGE_SIZE) { + packet->page_buf_cnt++; + packet->page_buf[0].len = PAGE_SIZE - + packet->page_buf[0].offset; + packet->page_buf[1].pfn = virt_to_phys((void *)&req->request_msg + + packet->page_buf[0].len) >> PAGE_SHIFT; + packet->page_buf[1].offset = 0; + packet->page_buf[1].len = req->request_msg.msg_len - + packet->page_buf[0].len; + } + + packet->send_completion = NULL; + packet->xmit_more = false; + + ret = netvsc_send(dev->net_dev->dev, packet); + return ret; +} + +static void rndis_set_link_state(struct rndis_device *rdev, + struct rndis_request *request) +{ + u32 link_status; + struct rndis_query_complete *query_complete; + + query_complete = &request->response_msg.msg.query_complete; + + if (query_complete->status == RNDIS_STATUS_SUCCESS && + query_complete->info_buflen == sizeof(u32)) { + memcpy(&link_status, (void *)((unsigned long)query_complete + + query_complete->info_buf_offset), sizeof(u32)); + rdev->link_state = link_status != 0; + } +} + +static void rndis_filter_receive_response(struct rndis_device *dev, + struct rndis_message *resp) +{ + struct rndis_request *request = NULL; + bool found = false; + unsigned long flags; + struct net_device *ndev; + + ndev = dev->net_dev->ndev; + + spin_lock_irqsave(&dev->request_lock, flags); + list_for_each_entry(request, &dev->req_list, list_ent) { + /* + * All request/response message contains RequestId as the 1st + * field + */ + if (request->request_msg.msg.init_req.req_id + == resp->msg.init_complete.req_id) { + found = true; + break; + } + } + spin_unlock_irqrestore(&dev->request_lock, flags); + + if (found) { + if (resp->msg_len <= + sizeof(struct rndis_message) + RNDIS_EXT_LEN) { + memcpy(&request->response_msg, resp, + resp->msg_len); + if (request->request_msg.ndis_msg_type == + RNDIS_MSG_QUERY && request->request_msg.msg. + query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) + rndis_set_link_state(dev, request); + } else { + netdev_err(ndev, + "rndis response buffer overflow " + "detected (size %u max %zu)\n", + resp->msg_len, + sizeof(struct rndis_message)); + + if (resp->ndis_msg_type == + RNDIS_MSG_RESET_C) { + /* does not have a request id field */ + request->response_msg.msg.reset_complete. + status = RNDIS_STATUS_BUFFER_OVERFLOW; + } else { + request->response_msg.msg. + init_complete.status = + RNDIS_STATUS_BUFFER_OVERFLOW; + } + } + + complete(&request->wait_event); + } else { + netdev_err(ndev, + "no rndis request found for this response " + "(id 0x%x res type 0x%x)\n", + resp->msg.init_complete.req_id, + resp->ndis_msg_type); + } +} + +/* + * Get the Per-Packet-Info with the specified type + * return NULL if not found. + */ +static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) +{ + struct rndis_per_packet_info *ppi; + int len; + + if (rpkt->per_pkt_info_offset == 0) + return NULL; + + ppi = (struct rndis_per_packet_info *)((ulong)rpkt + + rpkt->per_pkt_info_offset); + len = rpkt->per_pkt_info_len; + + while (len > 0) { + if (ppi->type == type) + return (void *)((ulong)ppi + ppi->ppi_offset); + len -= ppi->size; + ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size); + } + + return NULL; +} + +static void rndis_filter_receive_data(struct rndis_device *dev, + struct rndis_message *msg, + struct hv_netvsc_packet *pkt) +{ + struct rndis_packet *rndis_pkt; + u32 data_offset; + struct ndis_pkt_8021q_info *vlan; + struct ndis_tcp_ip_checksum_info *csum_info; + + rndis_pkt = &msg->msg.pkt; + + /* Remove the rndis header and pass it back up the stack */ + data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; + + pkt->total_data_buflen -= data_offset; + + /* + * Make sure we got a valid RNDIS message, now total_data_buflen + * should be the data packet size plus the trailer padding size + */ + if (pkt->total_data_buflen < rndis_pkt->data_len) { + netdev_err(dev->net_dev->ndev, "rndis message buffer " + "overflow detected (got %u, min %u)" + "...dropping this message!\n", + pkt->total_data_buflen, rndis_pkt->data_len); + return; + } + + /* + * Remove the rndis trailer padding from rndis packet message + * rndis_pkt->data_len tell us the real data length, we only copy + * the data packet to the stack, without the rndis trailer padding + */ + pkt->total_data_buflen = rndis_pkt->data_len; + pkt->data = (void *)((unsigned long)pkt->data + data_offset); + + vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); + if (vlan) { + pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | + (vlan->pri << VLAN_PRIO_SHIFT); + } else { + pkt->vlan_tci = 0; + } + + csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO); + netvsc_recv_callback(dev->net_dev->dev, pkt, csum_info); +} + +int rndis_filter_receive(struct hv_device *dev, + struct hv_netvsc_packet *pkt) +{ + struct netvsc_device *net_dev = hv_get_drvdata(dev); + struct rndis_device *rndis_dev; + struct rndis_message *rndis_msg; + struct net_device *ndev; + int ret = 0; + + if (!net_dev) { + ret = -EINVAL; + goto exit; + } + + ndev = net_dev->ndev; + + /* Make sure the rndis device state is initialized */ + if (!net_dev->extension) { + netdev_err(ndev, "got rndis message but no rndis device - " + "dropping this message!\n"); + ret = -ENODEV; + goto exit; + } + + rndis_dev = (struct rndis_device *)net_dev->extension; + if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) { + netdev_err(ndev, "got rndis message but rndis device " + "uninitialized...dropping this message!\n"); + ret = -ENODEV; + goto exit; + } + + rndis_msg = pkt->data; + + if (netif_msg_rx_err(net_dev->nd_ctx)) + dump_rndis_message(dev, rndis_msg); + + switch (rndis_msg->ndis_msg_type) { + case RNDIS_MSG_PACKET: + /* data msg */ + rndis_filter_receive_data(rndis_dev, rndis_msg, pkt); + break; + + case RNDIS_MSG_INIT_C: + case RNDIS_MSG_QUERY_C: + case RNDIS_MSG_SET_C: + /* completion msgs */ + rndis_filter_receive_response(rndis_dev, rndis_msg); + break; + + case RNDIS_MSG_INDICATE: + /* notification msgs */ + netvsc_linkstatus_callback(dev, rndis_msg); + break; + default: + netdev_err(ndev, + "unhandled rndis message (type %u len %u)\n", + rndis_msg->ndis_msg_type, + rndis_msg->msg_len); + break; + } + +exit: + if (ret != 0) + pkt->status = NVSP_STAT_FAIL; + + return ret; +} + +static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, + void *result, u32 *result_size) +{ + struct rndis_request *request; + u32 inresult_size = *result_size; + struct rndis_query_request *query; + struct rndis_query_complete *query_complete; + int ret = 0; + unsigned long t; + + if (!result) + return -EINVAL; + + *result_size = 0; + request = get_rndis_request(dev, RNDIS_MSG_QUERY, + RNDIS_MESSAGE_SIZE(struct rndis_query_request)); + if (!request) { + ret = -ENOMEM; + goto cleanup; + } + + /* Setup the rndis query */ + query = &request->request_msg.msg.query_req; + query->oid = oid; + query->info_buf_offset = sizeof(struct rndis_query_request); + query->info_buflen = 0; + query->dev_vc_handle = 0; + + if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) { + struct ndis_recv_scale_cap *cap; + + request->request_msg.msg_len += + sizeof(struct ndis_recv_scale_cap); + query->info_buflen = sizeof(struct ndis_recv_scale_cap); + cap = (struct ndis_recv_scale_cap *)((unsigned long)query + + query->info_buf_offset); + cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES; + cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2; + cap->hdr.size = sizeof(struct ndis_recv_scale_cap); + } + + ret = rndis_filter_send_request(dev, request); + if (ret != 0) + goto cleanup; + + t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + if (t == 0) { + ret = -ETIMEDOUT; + goto cleanup; + } + + /* Copy the response back */ + query_complete = &request->response_msg.msg.query_complete; + + if (query_complete->info_buflen > inresult_size) { + ret = -1; + goto cleanup; + } + + memcpy(result, + (void *)((unsigned long)query_complete + + query_complete->info_buf_offset), + query_complete->info_buflen); + + *result_size = query_complete->info_buflen; + +cleanup: + if (request) + put_rndis_request(dev, request); + + return ret; +} + +static int rndis_filter_query_device_mac(struct rndis_device *dev) +{ + u32 size = ETH_ALEN; + + return rndis_filter_query_device(dev, + RNDIS_OID_802_3_PERMANENT_ADDRESS, + dev->hw_mac_adr, &size); +} + +#define NWADR_STR "NetworkAddress" +#define NWADR_STRLEN 14 + +int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac) +{ + struct netvsc_device *nvdev = hv_get_drvdata(hdev); + struct rndis_device *rdev = nvdev->extension; + struct net_device *ndev = nvdev->ndev; + struct rndis_request *request; + struct rndis_set_request *set; + struct rndis_config_parameter_info *cpi; + wchar_t *cfg_nwadr, *cfg_mac; + struct rndis_set_complete *set_complete; + char macstr[2*ETH_ALEN+1]; + u32 extlen = sizeof(struct rndis_config_parameter_info) + + 2*NWADR_STRLEN + 4*ETH_ALEN; + int ret; + unsigned long t; + + request = get_rndis_request(rdev, RNDIS_MSG_SET, + RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); + if (!request) + return -ENOMEM; + + set = &request->request_msg.msg.set_req; + set->oid = RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER; + set->info_buflen = extlen; + set->info_buf_offset = sizeof(struct rndis_set_request); + set->dev_vc_handle = 0; + + cpi = (struct rndis_config_parameter_info *)((ulong)set + + set->info_buf_offset); + cpi->parameter_name_offset = + sizeof(struct rndis_config_parameter_info); + /* Multiply by 2 because host needs 2 bytes (utf16) for each char */ + cpi->parameter_name_length = 2*NWADR_STRLEN; + cpi->parameter_type = RNDIS_CONFIG_PARAM_TYPE_STRING; + cpi->parameter_value_offset = + cpi->parameter_name_offset + cpi->parameter_name_length; + /* Multiply by 4 because each MAC byte displayed as 2 utf16 chars */ + cpi->parameter_value_length = 4*ETH_ALEN; + + cfg_nwadr = (wchar_t *)((ulong)cpi + cpi->parameter_name_offset); + cfg_mac = (wchar_t *)((ulong)cpi + cpi->parameter_value_offset); + ret = utf8s_to_utf16s(NWADR_STR, NWADR_STRLEN, UTF16_HOST_ENDIAN, + cfg_nwadr, NWADR_STRLEN); + if (ret < 0) + goto cleanup; + snprintf(macstr, 2*ETH_ALEN+1, "%pm", mac); + ret = utf8s_to_utf16s(macstr, 2*ETH_ALEN, UTF16_HOST_ENDIAN, + cfg_mac, 2*ETH_ALEN); + if (ret < 0) + goto cleanup; + + ret = rndis_filter_send_request(rdev, request); + if (ret != 0) + goto cleanup; + + t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + if (t == 0) { + netdev_err(ndev, "timeout before we got a set response...\n"); + /* + * can't put_rndis_request, since we may still receive a + * send-completion. + */ + return -EBUSY; + } else { + set_complete = &request->response_msg.msg.set_complete; + if (set_complete->status != RNDIS_STATUS_SUCCESS) { + netdev_err(ndev, "Fail to set MAC on host side:0x%x\n", + set_complete->status); + ret = -EINVAL; + } + } + +cleanup: + put_rndis_request(rdev, request); + return ret; +} + +static int +rndis_filter_set_offload_params(struct hv_device *hdev, + struct ndis_offload_params *req_offloads) +{ + struct netvsc_device *nvdev = hv_get_drvdata(hdev); + struct rndis_device *rdev = nvdev->extension; + struct net_device *ndev = nvdev->ndev; + struct rndis_request *request; + struct rndis_set_request *set; + struct ndis_offload_params *offload_params; + struct rndis_set_complete *set_complete; + u32 extlen = sizeof(struct ndis_offload_params); + int ret; + unsigned long t; + u32 vsp_version = nvdev->nvsp_version; + + if (vsp_version <= NVSP_PROTOCOL_VERSION_4) { + extlen = VERSION_4_OFFLOAD_SIZE; + /* On NVSP_PROTOCOL_VERSION_4 and below, we do not support + * UDP checksum offload. + */ + req_offloads->udp_ip_v4_csum = 0; + req_offloads->udp_ip_v6_csum = 0; + } + + request = get_rndis_request(rdev, RNDIS_MSG_SET, + RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); + if (!request) + return -ENOMEM; + + set = &request->request_msg.msg.set_req; + set->oid = OID_TCP_OFFLOAD_PARAMETERS; + set->info_buflen = extlen; + set->info_buf_offset = sizeof(struct rndis_set_request); + set->dev_vc_handle = 0; + + offload_params = (struct ndis_offload_params *)((ulong)set + + set->info_buf_offset); + *offload_params = *req_offloads; + offload_params->header.type = NDIS_OBJECT_TYPE_DEFAULT; + offload_params->header.revision = NDIS_OFFLOAD_PARAMETERS_REVISION_3; + offload_params->header.size = extlen; + + ret = rndis_filter_send_request(rdev, request); + if (ret != 0) + goto cleanup; + + t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + if (t == 0) { + netdev_err(ndev, "timeout before we got aOFFLOAD set response...\n"); + /* can't put_rndis_request, since we may still receive a + * send-completion. + */ + return -EBUSY; + } else { + set_complete = &request->response_msg.msg.set_complete; + if (set_complete->status != RNDIS_STATUS_SUCCESS) { + netdev_err(ndev, "Fail to set offload on host side:0x%x\n", + set_complete->status); + ret = -EINVAL; + } + } + +cleanup: + put_rndis_request(rdev, request); + return ret; +} + +u8 netvsc_hash_key[HASH_KEYLEN] = { + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa +}; + +static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) +{ + struct net_device *ndev = rdev->net_dev->ndev; + struct rndis_request *request; + struct rndis_set_request *set; + struct rndis_set_complete *set_complete; + u32 extlen = sizeof(struct ndis_recv_scale_param) + + 4*ITAB_NUM + HASH_KEYLEN; + struct ndis_recv_scale_param *rssp; + u32 *itab; + u8 *keyp; + int i, ret; + unsigned long t; + + request = get_rndis_request( + rdev, RNDIS_MSG_SET, + RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); + if (!request) + return -ENOMEM; + + set = &request->request_msg.msg.set_req; + set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS; + set->info_buflen = extlen; + set->info_buf_offset = sizeof(struct rndis_set_request); + set->dev_vc_handle = 0; + + rssp = (struct ndis_recv_scale_param *)(set + 1); + rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS; + rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; + rssp->hdr.size = sizeof(struct ndis_recv_scale_param); + rssp->flag = 0; + rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | + NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 | + NDIS_HASH_TCP_IPV6; + rssp->indirect_tabsize = 4*ITAB_NUM; + rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param); + rssp->hashkey_size = HASH_KEYLEN; + rssp->kashkey_offset = rssp->indirect_taboffset + + rssp->indirect_tabsize; + + /* Set indirection table entries */ + itab = (u32 *)(rssp + 1); + for (i = 0; i < ITAB_NUM; i++) + itab[i] = i % num_queue; + + /* Set hask key values */ + keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); + for (i = 0; i < HASH_KEYLEN; i++) + keyp[i] = netvsc_hash_key[i]; + + + ret = rndis_filter_send_request(rdev, request); + if (ret != 0) + goto cleanup; + + t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + if (t == 0) { + netdev_err(ndev, "timeout before we got a set response...\n"); + /* can't put_rndis_request, since we may still receive a + * send-completion. + */ + return -ETIMEDOUT; + } else { + set_complete = &request->response_msg.msg.set_complete; + if (set_complete->status != RNDIS_STATUS_SUCCESS) { + netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", + set_complete->status); + ret = -EINVAL; + } + } + +cleanup: + put_rndis_request(rdev, request); + return ret; +} + + +static int rndis_filter_query_device_link_status(struct rndis_device *dev) +{ + u32 size = sizeof(u32); + u32 link_status; + int ret; + + ret = rndis_filter_query_device(dev, + RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, + &link_status, &size); + + return ret; +} + +int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter) +{ + struct rndis_request *request; + struct rndis_set_request *set; + struct rndis_set_complete *set_complete; + u32 status; + int ret; + unsigned long t; + struct net_device *ndev; + + ndev = dev->net_dev->ndev; + + request = get_rndis_request(dev, RNDIS_MSG_SET, + RNDIS_MESSAGE_SIZE(struct rndis_set_request) + + sizeof(u32)); + if (!request) { + ret = -ENOMEM; + goto cleanup; + } + + /* Setup the rndis set */ + set = &request->request_msg.msg.set_req; + set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER; + set->info_buflen = sizeof(u32); + set->info_buf_offset = sizeof(struct rndis_set_request); + + memcpy((void *)(unsigned long)set + sizeof(struct rndis_set_request), + &new_filter, sizeof(u32)); + + ret = rndis_filter_send_request(dev, request); + if (ret != 0) + goto cleanup; + + t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + + if (t == 0) { + netdev_err(ndev, + "timeout before we got a set response...\n"); + ret = -ETIMEDOUT; + /* + * We can't deallocate the request since we may still receive a + * send completion for it. + */ + goto exit; + } else { + set_complete = &request->response_msg.msg.set_complete; + status = set_complete->status; + } + +cleanup: + if (request) + put_rndis_request(dev, request); +exit: + return ret; +} + + +static int rndis_filter_init_device(struct rndis_device *dev) +{ + struct rndis_request *request; + struct rndis_initialize_request *init; + struct rndis_initialize_complete *init_complete; + u32 status; + int ret; + unsigned long t; + struct netvsc_device *nvdev = dev->net_dev; + + request = get_rndis_request(dev, RNDIS_MSG_INIT, + RNDIS_MESSAGE_SIZE(struct rndis_initialize_request)); + if (!request) { + ret = -ENOMEM; + goto cleanup; + } + + /* Setup the rndis set */ + init = &request->request_msg.msg.init_req; + init->major_ver = RNDIS_MAJOR_VERSION; + init->minor_ver = RNDIS_MINOR_VERSION; + init->max_xfer_size = 0x4000; + + dev->state = RNDIS_DEV_INITIALIZING; + + ret = rndis_filter_send_request(dev, request); + if (ret != 0) { + dev->state = RNDIS_DEV_UNINITIALIZED; + goto cleanup; + } + + + t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + + if (t == 0) { + ret = -ETIMEDOUT; + goto cleanup; + } + + init_complete = &request->response_msg.msg.init_complete; + status = init_complete->status; + if (status == RNDIS_STATUS_SUCCESS) { + dev->state = RNDIS_DEV_INITIALIZED; + nvdev->max_pkt = init_complete->max_pkt_per_msg; + nvdev->pkt_align = 1 << init_complete->pkt_alignment_factor; + ret = 0; + } else { + dev->state = RNDIS_DEV_UNINITIALIZED; + ret = -EINVAL; + } + +cleanup: + if (request) + put_rndis_request(dev, request); + + return ret; +} + +static void rndis_filter_halt_device(struct rndis_device *dev) +{ + struct rndis_request *request; + struct rndis_halt_request *halt; + struct netvsc_device *nvdev = dev->net_dev; + struct hv_device *hdev = nvdev->dev; + ulong flags; + + /* Attempt to do a rndis device halt */ + request = get_rndis_request(dev, RNDIS_MSG_HALT, + RNDIS_MESSAGE_SIZE(struct rndis_halt_request)); + if (!request) + goto cleanup; + + /* Setup the rndis set */ + halt = &request->request_msg.msg.halt_req; + halt->req_id = atomic_inc_return(&dev->new_req_id); + + /* Ignore return since this msg is optional. */ + rndis_filter_send_request(dev, request); + + dev->state = RNDIS_DEV_UNINITIALIZED; + +cleanup: + spin_lock_irqsave(&hdev->channel->inbound_lock, flags); + nvdev->destroy = true; + spin_unlock_irqrestore(&hdev->channel->inbound_lock, flags); + + /* Wait for all send completions */ + wait_event(nvdev->wait_drain, + atomic_read(&nvdev->num_outstanding_sends) == 0); + + if (request) + put_rndis_request(dev, request); + return; +} + +static int rndis_filter_open_device(struct rndis_device *dev) +{ + int ret; + + if (dev->state != RNDIS_DEV_INITIALIZED) + return 0; + + ret = rndis_filter_set_packet_filter(dev, + NDIS_PACKET_TYPE_BROADCAST | + NDIS_PACKET_TYPE_ALL_MULTICAST | + NDIS_PACKET_TYPE_DIRECTED); + if (ret == 0) + dev->state = RNDIS_DEV_DATAINITIALIZED; + + return ret; +} + +static int rndis_filter_close_device(struct rndis_device *dev) +{ + int ret; + + if (dev->state != RNDIS_DEV_DATAINITIALIZED) + return 0; + + ret = rndis_filter_set_packet_filter(dev, 0); + if (ret == -ENODEV) + ret = 0; + + if (ret == 0) + dev->state = RNDIS_DEV_INITIALIZED; + + return ret; +} + +static void netvsc_sc_open(struct vmbus_channel *new_sc) +{ + struct netvsc_device *nvscdev; + u16 chn_index = new_sc->offermsg.offer.sub_channel_index; + int ret; + + nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj); + + if (chn_index >= nvscdev->num_chn) + return; + + set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) * + NETVSC_PACKET_SIZE); + + ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, + nvscdev->ring_size * PAGE_SIZE, NULL, 0, + netvsc_channel_cb, new_sc); + + if (ret == 0) + nvscdev->chn_table[chn_index] = new_sc; +} + +int rndis_filter_device_add(struct hv_device *dev, + void *additional_info) +{ + int ret; + struct netvsc_device *net_device; + struct rndis_device *rndis_device; + struct netvsc_device_info *device_info = additional_info; + struct ndis_offload_params offloads; + struct nvsp_message *init_packet; + unsigned long t; + struct ndis_recv_scale_cap rsscap; + u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); + u32 mtu, size; + + rndis_device = get_rndis_device(); + if (!rndis_device) + return -ENODEV; + + /* + * Let the inner driver handle this first to create the netvsc channel + * NOTE! Once the channel is created, we may get a receive callback + * (RndisFilterOnReceive()) before this call is completed + */ + ret = netvsc_device_add(dev, additional_info); + if (ret != 0) { + kfree(rndis_device); + return ret; + } + + + /* Initialize the rndis device */ + net_device = hv_get_drvdata(dev); + net_device->max_chn = 1; + net_device->num_chn = 1; + + net_device->extension = rndis_device; + rndis_device->net_dev = net_device; + + /* Send the rndis initialization message */ + ret = rndis_filter_init_device(rndis_device); + if (ret != 0) { + rndis_filter_device_remove(dev); + return ret; + } + + /* Get the MTU from the host */ + size = sizeof(u32); + ret = rndis_filter_query_device(rndis_device, + RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE, + &mtu, &size); + if (ret == 0 && size == sizeof(u32)) + net_device->ndev->mtu = mtu; + + /* Get the mac address */ + ret = rndis_filter_query_device_mac(rndis_device); + if (ret != 0) { + rndis_filter_device_remove(dev); + return ret; + } + + memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); + + /* Turn on the offloads; the host supports all of the relevant + * offloads. + */ + memset(&offloads, 0, sizeof(struct ndis_offload_params)); + /* A value of zero means "no change"; now turn on what we + * want. + */ + offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; + + + ret = rndis_filter_set_offload_params(dev, &offloads); + if (ret) + goto err_dev_remv; + + rndis_filter_query_device_link_status(rndis_device); + + device_info->link_state = rndis_device->link_state; + + dev_info(&dev->device, "Device MAC %pM link state %s\n", + rndis_device->hw_mac_adr, + device_info->link_state ? "down" : "up"); + + if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) + return 0; + + /* vRSS setup */ + memset(&rsscap, 0, rsscap_size); + ret = rndis_filter_query_device(rndis_device, + OID_GEN_RECEIVE_SCALE_CAPABILITIES, + &rsscap, &rsscap_size); + if (ret || rsscap.num_recv_que < 2) + goto out; + + net_device->max_chn = rsscap.num_recv_que; + net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ? + num_online_cpus() : rsscap.num_recv_que; + if (net_device->num_chn == 1) + goto out; + + net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) * + NETVSC_PACKET_SIZE); + if (!net_device->sub_cb_buf) { + net_device->num_chn = 1; + dev_info(&dev->device, "No memory for subchannels.\n"); + goto out; + } + + vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); + + init_packet = &net_device->channel_init_pkt; + memset(init_packet, 0, sizeof(struct nvsp_message)); + init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; + init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE; + init_packet->msg.v5_msg.subchn_req.num_subchannels = + net_device->num_chn - 1; + ret = vmbus_sendpacket(dev->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret) + goto out; + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); + if (t == 0) { + ret = -ETIMEDOUT; + goto out; + } + if (init_packet->msg.v5_msg.subchn_comp.status != + NVSP_STAT_SUCCESS) { + ret = -ENODEV; + goto out; + } + net_device->num_chn = 1 + + init_packet->msg.v5_msg.subchn_comp.num_subchannels; + + ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn); + +out: + if (ret) { + net_device->max_chn = 1; + net_device->num_chn = 1; + } + return 0; /* return 0 because primary channel can be used alone */ + +err_dev_remv: + rndis_filter_device_remove(dev); + return ret; +} + +void rndis_filter_device_remove(struct hv_device *dev) +{ + struct netvsc_device *net_dev = hv_get_drvdata(dev); + struct rndis_device *rndis_dev = net_dev->extension; + + /* Halt and release the rndis device */ + rndis_filter_halt_device(rndis_dev); + + kfree(rndis_dev); + net_dev->extension = NULL; + + netvsc_device_remove(dev); +} + + +int rndis_filter_open(struct hv_device *dev) +{ + struct netvsc_device *net_device = hv_get_drvdata(dev); + + if (!net_device) + return -EINVAL; + + return rndis_filter_open_device(net_device->extension); +} + +int rndis_filter_close(struct hv_device *dev) +{ + struct netvsc_device *nvdev = hv_get_drvdata(dev); + + if (!nvdev) + return -EINVAL; + + return rndis_filter_close_device(nvdev->extension); +} |