summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/hv/channel_mgmt.c
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/hv/channel_mgmt.c
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/drivers/hv/channel_mgmt.c')
-rw-r--r--kernel/drivers/hv/channel_mgmt.c195
1 files changed, 147 insertions, 48 deletions
diff --git a/kernel/drivers/hv/channel_mgmt.c b/kernel/drivers/hv/channel_mgmt.c
index 0eeb1b3bc..652afd11a 100644
--- a/kernel/drivers/hv/channel_mgmt.c
+++ b/kernel/drivers/hv/channel_mgmt.c
@@ -32,6 +32,9 @@
#include "hyperv_vmbus.h"
+static void init_vp_index(struct vmbus_channel *channel,
+ const uuid_le *type_guid);
+
/**
* vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
* @icmsghdrp: Pointer to msg header structure
@@ -201,22 +204,38 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
list_del(&channel->listentry);
spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+
+ primary_channel = channel;
} else {
primary_channel = channel->primary_channel;
spin_lock_irqsave(&primary_channel->lock, flags);
list_del(&channel->sc_list);
+ primary_channel->num_sc--;
spin_unlock_irqrestore(&primary_channel->lock, flags);
}
+
+ /*
+ * We need to free the bit for init_vp_index() to work in the case
+ * of sub-channel, when we reload drivers like hv_netvsc.
+ */
+ cpumask_clear_cpu(channel->target_cpu,
+ &primary_channel->alloced_cpus_in_node);
+
free_channel(channel);
}
void vmbus_free_channels(void)
{
- struct vmbus_channel *channel;
+ struct vmbus_channel *channel, *tmp;
+
+ list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
+ listentry) {
+ /* if we don't set rescind to true, vmbus_close_internal()
+ * won't invoke hv_process_channel_removal().
+ */
+ channel->rescind = true;
- list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
vmbus_device_unregister(channel->device_obj);
- free_channel(channel);
}
}
@@ -228,7 +247,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
{
struct vmbus_channel *channel;
bool fnew = true;
- bool enq = false;
unsigned long flags;
/* Make sure this is a new offer */
@@ -244,25 +262,12 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
}
}
- if (fnew) {
+ if (fnew)
list_add_tail(&newchannel->listentry,
&vmbus_connection.chn_list);
- enq = true;
- }
spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
- if (enq) {
- if (newchannel->target_cpu != get_cpu()) {
- put_cpu();
- smp_call_function_single(newchannel->target_cpu,
- percpu_channel_enq,
- newchannel, true);
- } else {
- percpu_channel_enq(newchannel);
- put_cpu();
- }
- }
if (!fnew) {
/*
* Check to see if this is a sub-channel.
@@ -274,27 +279,22 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
newchannel->primary_channel = channel;
spin_lock_irqsave(&channel->lock, flags);
list_add_tail(&newchannel->sc_list, &channel->sc_list);
- spin_unlock_irqrestore(&channel->lock, flags);
-
- if (newchannel->target_cpu != get_cpu()) {
- put_cpu();
- smp_call_function_single(newchannel->target_cpu,
- percpu_channel_enq,
- newchannel, true);
- } else {
- percpu_channel_enq(newchannel);
- put_cpu();
- }
-
- newchannel->state = CHANNEL_OPEN_STATE;
channel->num_sc++;
- if (channel->sc_creation_callback != NULL)
- channel->sc_creation_callback(newchannel);
+ spin_unlock_irqrestore(&channel->lock, flags);
+ } else
+ goto err_free_chan;
+ }
- return;
- }
+ init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);
- goto err_free_chan;
+ if (newchannel->target_cpu != get_cpu()) {
+ put_cpu();
+ smp_call_function_single(newchannel->target_cpu,
+ percpu_channel_enq,
+ newchannel, true);
+ } else {
+ percpu_channel_enq(newchannel);
+ put_cpu();
}
/*
@@ -304,6 +304,12 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
*/
newchannel->state = CHANNEL_OPEN_STATE;
+ if (!fnew) {
+ if (channel->sc_creation_callback != NULL)
+ channel->sc_creation_callback(newchannel);
+ return;
+ }
+
/*
* Start the process of binding this offer to the driver
* We need to set the DeviceObject field before calling
@@ -351,6 +357,7 @@ enum {
IDE = 0,
SCSI,
NIC,
+ ND_NIC,
MAX_PERF_CHN,
};
@@ -374,23 +381,28 @@ static const struct hv_vmbus_device_id hp_devs[] = {
/*
* We use this state to statically distribute the channel interrupt load.
*/
-static u32 next_vp;
+static int next_numa_node_id;
/*
* Starting with Win8, we can statically distribute the incoming
- * channel interrupt load by binding a channel to VCPU. We
- * implement here a simple round robin scheme for distributing
- * the interrupt load.
- * We will bind channels that are not performance critical to cpu 0 and
- * performance critical channels (IDE, SCSI and Network) will be uniformly
- * distributed across all available CPUs.
+ * channel interrupt load by binding a channel to VCPU.
+ * We do this in a hierarchical fashion:
+ * First distribute the primary channels across available NUMA nodes
+ * and then distribute the subchannels amongst the CPUs in the NUMA
+ * node assigned to the primary channel.
+ *
+ * For pre-win8 hosts or non-performance critical channels we assign the
+ * first CPU in the first NUMA node.
*/
static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
{
u32 cur_cpu;
int i;
bool perf_chn = false;
- u32 max_cpus = num_online_cpus();
+ struct vmbus_channel *primary = channel->primary_channel;
+ int next_node;
+ struct cpumask available_mask;
+ struct cpumask *alloced_mask;
for (i = IDE; i < MAX_PERF_CHN; i++) {
if (!memcmp(type_guid->b, hp_devs[i].guid,
@@ -407,16 +419,104 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui
* Also if the channel is not a performance critical
* channel, bind it to cpu 0.
*/
+ channel->numa_node = 0;
channel->target_cpu = 0;
- channel->target_vp = 0;
+ channel->target_vp = hv_context.vp_index[0];
return;
}
- cur_cpu = (++next_vp % max_cpus);
+
+ /*
+ * We distribute primary channels evenly across all the available
+ * NUMA nodes and within the assigned NUMA node we will assign the
+ * first available CPU to the primary channel.
+ * The sub-channels will be assigned to the CPUs available in the
+ * NUMA node evenly.
+ */
+ if (!primary) {
+ while (true) {
+ next_node = next_numa_node_id++;
+ if (next_node == nr_node_ids)
+ next_node = next_numa_node_id = 0;
+ if (cpumask_empty(cpumask_of_node(next_node)))
+ continue;
+ break;
+ }
+ channel->numa_node = next_node;
+ primary = channel;
+ }
+ alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
+
+ if (cpumask_weight(alloced_mask) ==
+ cpumask_weight(cpumask_of_node(primary->numa_node))) {
+ /*
+ * We have cycled through all the CPUs in the node;
+ * reset the alloced map.
+ */
+ cpumask_clear(alloced_mask);
+ }
+
+ cpumask_xor(&available_mask, alloced_mask,
+ cpumask_of_node(primary->numa_node));
+
+ cur_cpu = -1;
+ while (true) {
+ cur_cpu = cpumask_next(cur_cpu, &available_mask);
+ if (cur_cpu >= nr_cpu_ids) {
+ cur_cpu = -1;
+ cpumask_copy(&available_mask,
+ cpumask_of_node(primary->numa_node));
+ continue;
+ }
+
+ /*
+ * NOTE: in the case of sub-channel, we clear the sub-channel
+ * related bit(s) in primary->alloced_cpus_in_node in
+ * hv_process_channel_removal(), so when we reload drivers
+ * like hv_netvsc in SMP guest, here we're able to re-allocate
+ * bit from primary->alloced_cpus_in_node.
+ */
+ if (!cpumask_test_cpu(cur_cpu,
+ &primary->alloced_cpus_in_node)) {
+ cpumask_set_cpu(cur_cpu,
+ &primary->alloced_cpus_in_node);
+ cpumask_set_cpu(cur_cpu, alloced_mask);
+ break;
+ }
+ }
+
channel->target_cpu = cur_cpu;
channel->target_vp = hv_context.vp_index[cur_cpu];
}
/*
+ * vmbus_unload_response - Handler for the unload response.
+ */
+static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
+{
+ /*
+ * This is a global event; just wakeup the waiting thread.
+ * Once we successfully unload, we can cleanup the monitor state.
+ */
+ complete(&vmbus_connection.unload_event);
+}
+
+void vmbus_initiate_unload(void)
+{
+ struct vmbus_channel_message_header hdr;
+
+ /* Pre-Win2012R2 hosts don't support reconnect */
+ if (vmbus_proto_version < VERSION_WIN8_1)
+ return;
+
+ init_completion(&vmbus_connection.unload_event);
+ memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
+ hdr.msgtype = CHANNELMSG_UNLOAD;
+ vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
+
+ wait_for_completion(&vmbus_connection.unload_event);
+}
+
+/*
* vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
*
*/
@@ -461,8 +561,6 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
offer->connection_id;
}
- init_vp_index(newchannel, &offer->offer.if_type);
-
memcpy(&newchannel->offermsg, offer,
sizeof(struct vmbus_channel_offer_channel));
newchannel->monitor_grp = (u8)offer->monitorid / 32;
@@ -712,6 +810,7 @@ struct vmbus_channel_message_table_entry
{CHANNELMSG_INITIATE_CONTACT, 0, NULL},
{CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response},
{CHANNELMSG_UNLOAD, 0, NULL},
+ {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response},
};
/*