summaryrefslogtreecommitdiffstats
path: root/kernel/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/arch/x86/kernel/smpboot.c
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/arch/x86/kernel/smpboot.c')
-rw-r--r--kernel/arch/x86/kernel/smpboot.c162
1 files changed, 102 insertions, 60 deletions
diff --git a/kernel/arch/x86/kernel/smpboot.c b/kernel/arch/x86/kernel/smpboot.c
index 50e547eac..fbabe4fcc 100644
--- a/kernel/arch/x86/kernel/smpboot.c
+++ b/kernel/arch/x86/kernel/smpboot.c
@@ -68,8 +68,7 @@
#include <asm/mwait.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
-#include <asm/i387.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/internal.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
@@ -98,8 +97,6 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
-atomic_t init_deasserted;
-
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -147,16 +144,11 @@ static void smp_callin(void)
/*
* If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- *
- * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI.
+ * cpu_callout_mask guarantees we don't get here before
+ * an INIT_deassert IPI reaches our local APIC, so it is
+ * now safe to touch our local APIC.
*/
cpuid = smp_processor_id();
- if (apic->wait_for_init_deassert && cpuid)
- while (!atomic_read(&init_deasserted))
- cpu_relax();
/*
* (This works even if the APIC is not enabled.)
@@ -172,11 +164,6 @@ static void smp_callin(void)
apic_ap_setup();
/*
- * Need to setup vector mappings before we enable interrupts.
- */
- setup_vector_irq(smp_processor_id());
-
- /*
* Save our processor parameters. Note: this information
* is needed for clock calibration.
*/
@@ -240,18 +227,13 @@ static void notrace start_secondary(void *unused)
check_tsc_sync_target();
/*
- * Enable the espfix hack for this CPU
- */
-#ifdef CONFIG_X86_ESPFIX64
- init_espfix_ap();
-#endif
-
- /*
- * We need to hold vector_lock so there the set of online cpus
- * does not change while we are assigning vectors to cpus. Holding
- * this lock ensures we don't half assign or remove an irq from a cpu.
+ * Lock vector_lock and initialize the vectors on this cpu
+ * before setting the cpu online. We must set it online with
+ * vector_lock held to prevent a concurrent setup/teardown
+ * from seeing a half valid vector space.
*/
lock_vector_lock();
+ setup_vector_irq(smp_processor_id());
set_cpu_online(smp_processor_id(), true);
unlock_vector_lock();
cpu_set_state_online(smp_processor_id());
@@ -314,10 +296,10 @@ topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
}
-#define link_mask(_m, c1, c2) \
+#define link_mask(mfunc, c1, c2) \
do { \
- cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
- cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
+ cpumask_set_cpu((c1), mfunc(c2)); \
+ cpumask_set_cpu((c2), mfunc(c1)); \
} while (0)
static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
@@ -398,9 +380,9 @@ void set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
if (!has_mp) {
- cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+ cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
- cpumask_set_cpu(cpu, cpu_core_mask(cpu));
+ cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
c->booted_cores = 1;
return;
}
@@ -409,32 +391,34 @@ void set_cpu_sibling_map(int cpu)
o = &cpu_data(i);
if ((i == cpu) || (has_smt && match_smt(c, o)))
- link_mask(sibling, cpu, i);
+ link_mask(topology_sibling_cpumask, cpu, i);
if ((i == cpu) || (has_mp && match_llc(c, o)))
- link_mask(llc_shared, cpu, i);
+ link_mask(cpu_llc_shared_mask, cpu, i);
}
/*
* This needs a separate iteration over the cpus because we rely on all
- * cpu_sibling_mask links to be set-up.
+ * topology_sibling_cpumask links to be set-up.
*/
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
if ((i == cpu) || (has_mp && match_die(c, o))) {
- link_mask(core, cpu, i);
+ link_mask(topology_core_cpumask, cpu, i);
/*
* Does this new cpu bringup a new core?
*/
- if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) {
+ if (cpumask_weight(
+ topology_sibling_cpumask(cpu)) == 1) {
/*
* for each core in package, increment
* the booted_cores for this new cpu
*/
- if (cpumask_first(cpu_sibling_mask(i)) == i)
+ if (cpumask_first(
+ topology_sibling_cpumask(i)) == i)
c->booted_cores++;
/*
* increment the core count for all
@@ -514,6 +498,44 @@ void __inquire_remote_apic(int apicid)
}
/*
+ * The Multiprocessor Specification 1.4 (1997) example code suggests
+ * that there should be a 10ms delay between the BSP asserting INIT
+ * and de-asserting INIT, when starting a remote processor.
+ * But that slows boot and resume on modern processors, which include
+ * many cores and don't require that delay.
+ *
+ * Cmdline "init_cpu_udelay=" is available to over-ride this delay.
+ * Modern processor families are quirked to remove the delay entirely.
+ */
+#define UDELAY_10MS_DEFAULT 10000
+
+static unsigned int init_udelay = UINT_MAX;
+
+static int __init cpu_init_udelay(char *str)
+{
+ get_option(&str, &init_udelay);
+
+ return 0;
+}
+early_param("cpu_init_udelay", cpu_init_udelay);
+
+static void __init smp_quirk_init_udelay(void)
+{
+ /* if cmdline changed it from default, leave it alone */
+ if (init_udelay != UINT_MAX)
+ return;
+
+ /* if modern processor, use no delay */
+ if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
+ ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
+ init_udelay = 0;
+ return;
+ }
+ /* else, use legacy delay */
+ init_udelay = UDELAY_10MS_DEFAULT;
+}
+
+/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
@@ -555,7 +577,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
static int
wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
- unsigned long send_status, accept_status = 0;
+ unsigned long send_status = 0, accept_status = 0;
int maxlvt, num_starts, j;
maxlvt = lapic_get_maxlvt();
@@ -583,7 +605,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
- mdelay(10);
+ udelay(init_udelay);
pr_debug("Deasserting INIT\n");
@@ -595,7 +617,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
send_status = safe_apic_wait_icr_idle();
mb();
- atomic_set(&init_deasserted, 1);
/*
* Should we send STARTUP IPIs ?
@@ -640,7 +661,10 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- udelay(300);
+ if (init_udelay == 0)
+ udelay(10);
+ else
+ udelay(300);
pr_debug("Startup point 1\n");
@@ -650,7 +674,11 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- udelay(200);
+ if (init_udelay == 0)
+ udelay(10);
+ else
+ udelay(200);
+
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
accept_status = (apic_read(APIC_ESR) & 0xEF);
@@ -792,8 +820,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
- per_cpu(kernel_stack, cpu) =
- (unsigned long)task_stack_page(idle) + THREAD_SIZE;
}
/*
@@ -820,6 +846,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp;
+ /*
+ * Enable the espfix hack for this CPU
+ */
+#ifdef CONFIG_X86_ESPFIX64
+ init_espfix_ap(cpu);
+#endif
+
/* So we see what's up */
announce_cpu(cpu, apicid);
@@ -828,8 +861,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
* the targeted processor.
*/
- atomic_set(&init_deasserted, 0);
-
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
pr_debug("Setting warm reset code and vector.\n");
@@ -867,7 +898,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
if (!boot_error) {
/*
- * Wait 10s total for a response from AP
+ * Wait 10s total for first sign of life from AP
*/
boot_error = -1;
timeout = jiffies + 10*HZ;
@@ -880,7 +911,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
boot_error = 0;
break;
}
- udelay(100);
schedule();
}
}
@@ -896,7 +926,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
* for the MTRR work(triggered by the AP coming online)
* to be completed in the stop machine context.
*/
- udelay(100);
schedule();
}
}
@@ -961,8 +990,17 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
common_cpu_up(cpu, tidle);
+ /*
+ * We have to walk the irq descriptors to setup the vector
+ * space for the cpu which comes online. Prevent irq
+ * alloc/free across the bringup.
+ */
+ irq_lock_sparse();
+
err = do_boot_cpu(apicid, cpu, tidle);
+
if (err) {
+ irq_unlock_sparse();
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
return -EIO;
}
@@ -980,6 +1018,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
touch_nmi_watchdog();
}
+ irq_unlock_sparse();
+
return 0;
}
@@ -1009,8 +1049,8 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
else
physid_set_mask_of_physid(0, &phys_cpu_present_map);
- cpumask_set_cpu(0, cpu_sibling_mask(0));
- cpumask_set_cpu(0, cpu_core_mask(0));
+ cpumask_set_cpu(0, topology_sibling_cpumask(0));
+ cpumask_set_cpu(0, topology_core_cpumask(0));
}
enum {
@@ -1176,6 +1216,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
uv_system_init();
set_mtrr_aps_delayed_init();
+
+ smp_quirk_init_udelay();
}
void arch_enable_nonboot_cpus_begin(void)
@@ -1293,28 +1335,28 @@ static void remove_siblinginfo(int cpu)
int sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
- for_each_cpu(sibling, cpu_core_mask(cpu)) {
- cpumask_clear_cpu(cpu, cpu_core_mask(sibling));
+ for_each_cpu(sibling, topology_core_cpumask(cpu)) {
+ cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
/*/
* last thread sibling in this cpu core going down
*/
- if (cpumask_weight(cpu_sibling_mask(cpu)) == 1)
+ if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
cpu_data(sibling).booted_cores--;
}
- for_each_cpu(sibling, cpu_sibling_mask(cpu))
- cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+ for_each_cpu(sibling, topology_sibling_cpumask(cpu))
+ cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
cpumask_clear(cpu_llc_shared_mask(cpu));
- cpumask_clear(cpu_sibling_mask(cpu));
- cpumask_clear(cpu_core_mask(cpu));
+ cpumask_clear(topology_sibling_cpumask(cpu));
+ cpumask_clear(topology_core_cpumask(cpu));
c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
}
-static void __ref remove_cpu_from_maps(int cpu)
+static void remove_cpu_from_maps(int cpu)
{
set_cpu_online(cpu, false);
cpumask_clear_cpu(cpu, cpu_callout_mask);