diff options
Diffstat (limited to 'kernel/drivers/hv/channel_mgmt.c')
-rw-r--r-- | kernel/drivers/hv/channel_mgmt.c | 195 |
1 files changed, 147 insertions, 48 deletions
diff --git a/kernel/drivers/hv/channel_mgmt.c b/kernel/drivers/hv/channel_mgmt.c index 0eeb1b3bc..652afd11a 100644 --- a/kernel/drivers/hv/channel_mgmt.c +++ b/kernel/drivers/hv/channel_mgmt.c @@ -32,6 +32,9 @@ #include "hyperv_vmbus.h" +static void init_vp_index(struct vmbus_channel *channel, + const uuid_le *type_guid); + /** * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message * @icmsghdrp: Pointer to msg header structure @@ -201,22 +204,38 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) spin_lock_irqsave(&vmbus_connection.channel_lock, flags); list_del(&channel->listentry); spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); + + primary_channel = channel; } else { primary_channel = channel->primary_channel; spin_lock_irqsave(&primary_channel->lock, flags); list_del(&channel->sc_list); + primary_channel->num_sc--; spin_unlock_irqrestore(&primary_channel->lock, flags); } + + /* + * We need to free the bit for init_vp_index() to work in the case + * of sub-channel, when we reload drivers like hv_netvsc. + */ + cpumask_clear_cpu(channel->target_cpu, + &primary_channel->alloced_cpus_in_node); + free_channel(channel); } void vmbus_free_channels(void) { - struct vmbus_channel *channel; + struct vmbus_channel *channel, *tmp; + + list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, + listentry) { + /* if we don't set rescind to true, vmbus_close_internal() + * won't invoke hv_process_channel_removal(). + */ + channel->rescind = true; - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { vmbus_device_unregister(channel->device_obj); - free_channel(channel); } } @@ -228,7 +247,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) { struct vmbus_channel *channel; bool fnew = true; - bool enq = false; unsigned long flags; /* Make sure this is a new offer */ @@ -244,25 +262,12 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) } } - if (fnew) { + if (fnew) list_add_tail(&newchannel->listentry, &vmbus_connection.chn_list); - enq = true; - } spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); - if (enq) { - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, - percpu_channel_enq, - newchannel, true); - } else { - percpu_channel_enq(newchannel); - put_cpu(); - } - } if (!fnew) { /* * Check to see if this is a sub-channel. @@ -274,27 +279,22 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) newchannel->primary_channel = channel; spin_lock_irqsave(&channel->lock, flags); list_add_tail(&newchannel->sc_list, &channel->sc_list); - spin_unlock_irqrestore(&channel->lock, flags); - - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, - percpu_channel_enq, - newchannel, true); - } else { - percpu_channel_enq(newchannel); - put_cpu(); - } - - newchannel->state = CHANNEL_OPEN_STATE; channel->num_sc++; - if (channel->sc_creation_callback != NULL) - channel->sc_creation_callback(newchannel); + spin_unlock_irqrestore(&channel->lock, flags); + } else + goto err_free_chan; + } - return; - } + init_vp_index(newchannel, &newchannel->offermsg.offer.if_type); - goto err_free_chan; + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(newchannel->target_cpu, + percpu_channel_enq, + newchannel, true); + } else { + percpu_channel_enq(newchannel); + put_cpu(); } /* @@ -304,6 +304,12 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) */ newchannel->state = CHANNEL_OPEN_STATE; + if (!fnew) { + if (channel->sc_creation_callback != NULL) + channel->sc_creation_callback(newchannel); + return; + } + /* * Start the process of binding this offer to the driver * We need to set the DeviceObject field before calling @@ -351,6 +357,7 @@ enum { IDE = 0, SCSI, NIC, + ND_NIC, MAX_PERF_CHN, }; @@ -374,23 +381,28 @@ static const struct hv_vmbus_device_id hp_devs[] = { /* * We use this state to statically distribute the channel interrupt load. */ -static u32 next_vp; +static int next_numa_node_id; /* * Starting with Win8, we can statically distribute the incoming - * channel interrupt load by binding a channel to VCPU. We - * implement here a simple round robin scheme for distributing - * the interrupt load. - * We will bind channels that are not performance critical to cpu 0 and - * performance critical channels (IDE, SCSI and Network) will be uniformly - * distributed across all available CPUs. + * channel interrupt load by binding a channel to VCPU. + * We do this in a hierarchical fashion: + * First distribute the primary channels across available NUMA nodes + * and then distribute the subchannels amongst the CPUs in the NUMA + * node assigned to the primary channel. + * + * For pre-win8 hosts or non-performance critical channels we assign the + * first CPU in the first NUMA node. */ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid) { u32 cur_cpu; int i; bool perf_chn = false; - u32 max_cpus = num_online_cpus(); + struct vmbus_channel *primary = channel->primary_channel; + int next_node; + struct cpumask available_mask; + struct cpumask *alloced_mask; for (i = IDE; i < MAX_PERF_CHN; i++) { if (!memcmp(type_guid->b, hp_devs[i].guid, @@ -407,16 +419,104 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui * Also if the channel is not a performance critical * channel, bind it to cpu 0. */ + channel->numa_node = 0; channel->target_cpu = 0; - channel->target_vp = 0; + channel->target_vp = hv_context.vp_index[0]; return; } - cur_cpu = (++next_vp % max_cpus); + + /* + * We distribute primary channels evenly across all the available + * NUMA nodes and within the assigned NUMA node we will assign the + * first available CPU to the primary channel. + * The sub-channels will be assigned to the CPUs available in the + * NUMA node evenly. + */ + if (!primary) { + while (true) { + next_node = next_numa_node_id++; + if (next_node == nr_node_ids) + next_node = next_numa_node_id = 0; + if (cpumask_empty(cpumask_of_node(next_node))) + continue; + break; + } + channel->numa_node = next_node; + primary = channel; + } + alloced_mask = &hv_context.hv_numa_map[primary->numa_node]; + + if (cpumask_weight(alloced_mask) == + cpumask_weight(cpumask_of_node(primary->numa_node))) { + /* + * We have cycled through all the CPUs in the node; + * reset the alloced map. + */ + cpumask_clear(alloced_mask); + } + + cpumask_xor(&available_mask, alloced_mask, + cpumask_of_node(primary->numa_node)); + + cur_cpu = -1; + while (true) { + cur_cpu = cpumask_next(cur_cpu, &available_mask); + if (cur_cpu >= nr_cpu_ids) { + cur_cpu = -1; + cpumask_copy(&available_mask, + cpumask_of_node(primary->numa_node)); + continue; + } + + /* + * NOTE: in the case of sub-channel, we clear the sub-channel + * related bit(s) in primary->alloced_cpus_in_node in + * hv_process_channel_removal(), so when we reload drivers + * like hv_netvsc in SMP guest, here we're able to re-allocate + * bit from primary->alloced_cpus_in_node. + */ + if (!cpumask_test_cpu(cur_cpu, + &primary->alloced_cpus_in_node)) { + cpumask_set_cpu(cur_cpu, + &primary->alloced_cpus_in_node); + cpumask_set_cpu(cur_cpu, alloced_mask); + break; + } + } + channel->target_cpu = cur_cpu; channel->target_vp = hv_context.vp_index[cur_cpu]; } /* + * vmbus_unload_response - Handler for the unload response. + */ +static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) +{ + /* + * This is a global event; just wakeup the waiting thread. + * Once we successfully unload, we can cleanup the monitor state. + */ + complete(&vmbus_connection.unload_event); +} + +void vmbus_initiate_unload(void) +{ + struct vmbus_channel_message_header hdr; + + /* Pre-Win2012R2 hosts don't support reconnect */ + if (vmbus_proto_version < VERSION_WIN8_1) + return; + + init_completion(&vmbus_connection.unload_event); + memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); + hdr.msgtype = CHANNELMSG_UNLOAD; + vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header)); + + wait_for_completion(&vmbus_connection.unload_event); +} + +/* * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. * */ @@ -461,8 +561,6 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) offer->connection_id; } - init_vp_index(newchannel, &offer->offer.if_type); - memcpy(&newchannel->offermsg, offer, sizeof(struct vmbus_channel_offer_channel)); newchannel->monitor_grp = (u8)offer->monitorid / 32; @@ -712,6 +810,7 @@ struct vmbus_channel_message_table_entry {CHANNELMSG_INITIATE_CONTACT, 0, NULL}, {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response}, {CHANNELMSG_UNLOAD, 0, NULL}, + {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response}, }; /* |