提交 e1367daf 编写于 作者: L Li Shaohua 提交者: Linus Torvalds

[PATCH] cpu state clean after hot remove

Clean CPU states in order to reuse smp boot code for CPU hotplug.

Signed-off-by: Li Shaohua<shaohua.li@intel.com>
Signed-off-by: NAndrew Morton <akpm@osdl.org>
Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
上级 0bb3184d
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
...@@ -651,3 +651,15 @@ void __devinit cpu_init(void) ...@@ -651,3 +651,15 @@ void __devinit cpu_init(void)
clear_used_math(); clear_used_math();
mxcsr_feature_mask_init(); mxcsr_feature_mask_init();
} }
#ifdef CONFIG_HOTPLUG_CPU
void __devinit cpu_uninit(void)
{
int cpu = raw_smp_processor_id();
cpu_clear(cpu, cpu_initialized);
/* lazy TLB state */
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}
#endif
...@@ -156,6 +156,11 @@ void irq_ctx_init(int cpu) ...@@ -156,6 +156,11 @@ void irq_ctx_init(int cpu)
cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
} }
void irq_ctx_exit(int cpu)
{
hardirq_ctx[cpu] = NULL;
}
extern asmlinkage void __do_softirq(void); extern asmlinkage void __do_softirq(void);
asmlinkage void do_softirq(void) asmlinkage void do_softirq(void)
......
...@@ -152,21 +152,19 @@ static void poll_idle (void) ...@@ -152,21 +152,19 @@ static void poll_idle (void)
/* We don't actually take CPU down, just spin without interrupts. */ /* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void) static inline void play_dead(void)
{ {
/* This must be done before dead CPU ack */
cpu_exit_clear();
wbinvd();
mb();
/* Ack it */ /* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD; __get_cpu_var(cpu_state) = CPU_DEAD;
/* We shouldn't have to disable interrupts while dead, but /*
* some interrupts just don't seem to go away, and this makes * With physical CPU hotplug, we should halt the cpu
* it "work" for testing purposes. */ */
/* Death loop */
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();
local_irq_disable(); local_irq_disable();
__flush_tlb_all(); while (1)
cpu_set(smp_processor_id(), cpu_online_map); __asm__ __volatile__("hlt":::"memory");
enable_APIC_timer();
local_irq_enable();
} }
#else #else
static inline void play_dead(void) static inline void play_dead(void)
......
...@@ -90,6 +90,12 @@ cpumask_t cpu_callout_map; ...@@ -90,6 +90,12 @@ cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map); EXPORT_SYMBOL(cpu_callout_map);
static cpumask_t smp_commenced_mask; static cpumask_t smp_commenced_mask;
/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
* is no way to resync one AP against BP. TBD: for prescott and above, we
* should use IA64's algorithm
*/
static int __devinitdata tsc_sync_disabled;
/* Per CPU bogomips and other parameters */ /* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_data); EXPORT_SYMBOL(cpu_data);
...@@ -427,7 +433,7 @@ static void __devinit smp_callin(void) ...@@ -427,7 +433,7 @@ static void __devinit smp_callin(void)
/* /*
* Synchronize the TSC with the BP * Synchronize the TSC with the BP
*/ */
if (cpu_has_tsc && cpu_khz) if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
synchronize_tsc_ap(); synchronize_tsc_ap();
} }
...@@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused) ...@@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused)
lock_ipi_call_lock(); lock_ipi_call_lock();
cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_online_map);
unlock_ipi_call_lock(); unlock_ipi_call_lock();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
/* We can take interrupts now: we're officially "up". */ /* We can take interrupts now: we're officially "up". */
local_irq_enable(); local_irq_enable();
...@@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) ...@@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
#endif /* WAKE_SECONDARY_VIA_INIT */ #endif /* WAKE_SECONDARY_VIA_INIT */
extern cpumask_t cpu_initialized; extern cpumask_t cpu_initialized;
static inline int alloc_cpu_id(void)
{
cpumask_t tmp_map;
int cpu;
cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
if (cpu >= NR_CPUS)
return -ENODEV;
return cpu;
}
#ifdef CONFIG_HOTPLUG_CPU
static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
static inline struct task_struct * alloc_idle_task(int cpu)
{
struct task_struct *idle;
if ((idle = cpu_idle_tasks[cpu]) != NULL) {
/* initialize thread_struct. we really want to avoid destroy
* idle tread
*/
idle->thread.esp = (unsigned long)(((struct pt_regs *)
(THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
init_idle(idle, cpu);
return idle;
}
idle = fork_idle(cpu);
if (!IS_ERR(idle))
cpu_idle_tasks[cpu] = idle;
return idle;
}
#else
#define alloc_idle_task(cpu) fork_idle(cpu)
#endif
static int __devinit do_boot_cpu(int apicid) static int __devinit do_boot_cpu(int apicid, int cpu)
/* /*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID. * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
...@@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid) ...@@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid)
{ {
struct task_struct *idle; struct task_struct *idle;
unsigned long boot_error; unsigned long boot_error;
int timeout, cpu; int timeout;
unsigned long start_eip; unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0; unsigned short nmi_high = 0, nmi_low = 0;
cpu = ++cpucount; ++cpucount;
/* /*
* We can't use kernel_thread since we must avoid to * We can't use kernel_thread since we must avoid to
* reschedule the child. * reschedule the child.
*/ */
idle = fork_idle(cpu); idle = alloc_idle_task(cpu);
if (IS_ERR(idle)) if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu); panic("failed fork for CPU %d", cpu);
idle->thread.eip = (unsigned long) start_secondary; idle->thread.eip = (unsigned long) start_secondary;
...@@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid) ...@@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid)
inquire_remote_apic(apicid); inquire_remote_apic(apicid);
} }
} }
x86_cpu_to_apicid[cpu] = apicid;
if (boot_error) { if (boot_error) {
/* Try to put things back the way they were before ... */ /* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu); unmap_cpu_to_logical_apicid(cpu);
cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
cpucount--; cpucount--;
} else {
x86_cpu_to_apicid[cpu] = apicid;
cpu_set(cpu, cpu_present_map);
} }
/* mark "stuck" area as not stuck */ /* mark "stuck" area as not stuck */
...@@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid) ...@@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid)
return boot_error; return boot_error;
} }
#ifdef CONFIG_HOTPLUG_CPU
void cpu_exit_clear(void)
{
int cpu = raw_smp_processor_id();
idle_task_exit();
cpucount --;
cpu_uninit();
irq_ctx_exit(cpu);
cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map);
cpu_clear(cpu, cpu_present_map);
cpu_clear(cpu, smp_commenced_mask);
unmap_cpu_to_logical_apicid(cpu);
}
struct warm_boot_cpu_info {
struct completion *complete;
int apicid;
int cpu;
};
static void __devinit do_warm_boot_cpu(void *p)
{
struct warm_boot_cpu_info *info = p;
do_boot_cpu(info->apicid, info->cpu);
complete(info->complete);
}
int __devinit smp_prepare_cpu(int cpu)
{
DECLARE_COMPLETION(done);
struct warm_boot_cpu_info info;
struct work_struct task;
int apicid, ret;
lock_cpu_hotplug();
apicid = x86_cpu_to_apicid[cpu];
if (apicid == BAD_APICID) {
ret = -ENODEV;
goto exit;
}
info.complete = &done;
info.apicid = apicid;
info.cpu = cpu;
INIT_WORK(&task, do_warm_boot_cpu, &info);
tsc_sync_disabled = 1;
/* init low mem mapping */
memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
flush_tlb_all();
schedule_work(&task);
wait_for_completion(&done);
tsc_sync_disabled = 0;
zap_low_mappings();
ret = 0;
exit:
unlock_cpu_hotplug();
return ret;
}
#endif
static void smp_tune_scheduling (void) static void smp_tune_scheduling (void)
{ {
unsigned long cachesize; /* kB */ unsigned long cachesize; /* kB */
...@@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ...@@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (max_cpus <= cpucount+1) if (max_cpus <= cpucount+1)
continue; continue;
if (do_boot_cpu(apicid)) if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
printk("CPU #%d not responding - cannot use it.\n", printk("CPU #%d not responding - cannot use it.\n",
apicid); apicid);
else else
...@@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void) ...@@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void)
{ {
cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map); cpu_set(smp_processor_id(), cpu_callout_map);
cpu_set(smp_processor_id(), cpu_present_map);
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
} }
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
static void
/* must be called with the cpucontrol mutex held */ remove_siblinginfo(int cpu)
static int __devinit cpu_enable(unsigned int cpu)
{ {
/* get the target out of its holding state */ int sibling;
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
wmb(); for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
cpu_clear(cpu, cpu_sibling_map[sibling]);
/* wait for the processor to ack it. timeout? */ for_each_cpu_mask(sibling, cpu_core_map[cpu])
while (!cpu_online(cpu)) cpu_clear(cpu, cpu_core_map[sibling]);
cpu_relax(); cpus_clear(cpu_sibling_map[cpu]);
cpus_clear(cpu_core_map[cpu]);
fixup_irqs(cpu_online_map); phys_proc_id[cpu] = BAD_APICID;
/* counter the disable in fixup_irqs() */ cpu_core_id[cpu] = BAD_APICID;
local_irq_enable();
return 0;
} }
int __cpu_disable(void) int __cpu_disable(void)
...@@ -1193,6 +1307,8 @@ int __cpu_disable(void) ...@@ -1193,6 +1307,8 @@ int __cpu_disable(void)
mdelay(1); mdelay(1);
local_irq_disable(); local_irq_disable();
remove_siblinginfo(cpu);
cpu_clear(cpu, map); cpu_clear(cpu, map);
fixup_irqs(map); fixup_irqs(map);
/* It's now safe to remove this processor from the online map */ /* It's now safe to remove this processor from the online map */
...@@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu) ...@@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu)
for (i = 0; i < 10; i++) { for (i = 0; i < 10; i++) {
/* They ack this in play_dead by setting CPU_DEAD */ /* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
printk ("CPU %d is now offline\n", cpu);
return; return;
}
current->state = TASK_UNINTERRUPTIBLE; current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ/10); schedule_timeout(HZ/10);
} }
...@@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu) ...@@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu)
return -EIO; return -EIO;
} }
#ifdef CONFIG_HOTPLUG_CPU
/* Already up, and in cpu_quiescent now? */
if (cpu_isset(cpu, smp_commenced_mask)) {
cpu_enable(cpu);
return 0;
}
#endif
local_irq_enable(); local_irq_enable();
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */ /* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask); cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map)) while (!cpu_isset(cpu, cpu_online_map))
...@@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus) ...@@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest(); setup_ioapic_dest();
#endif #endif
zap_low_mappings(); zap_low_mappings();
#ifndef CONFIG_HOTPLUG_CPU
/* /*
* Disable executability of the SMP trampoline: * Disable executability of the SMP trampoline:
*/ */
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
#endif
} }
void __init smp_intr_init(void) void __init smp_intr_init(void)
......
...@@ -16,6 +16,10 @@ struct sysdev_class cpu_sysdev_class = { ...@@ -16,6 +16,10 @@ struct sysdev_class cpu_sysdev_class = {
EXPORT_SYMBOL(cpu_sysdev_class); EXPORT_SYMBOL(cpu_sysdev_class);
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
#ifndef __HAVE_ARCH_SMP_PREPARE_CPU
#define smp_prepare_cpu(cpu) (0)
#endif
static ssize_t show_online(struct sys_device *dev, char *buf) static ssize_t show_online(struct sys_device *dev, char *buf)
{ {
struct cpu *cpu = container_of(dev, struct cpu, sysdev); struct cpu *cpu = container_of(dev, struct cpu, sysdev);
...@@ -36,7 +40,9 @@ static ssize_t store_online(struct sys_device *dev, const char *buf, ...@@ -36,7 +40,9 @@ static ssize_t store_online(struct sys_device *dev, const char *buf,
kobject_hotplug(&dev->kobj, KOBJ_OFFLINE); kobject_hotplug(&dev->kobj, KOBJ_OFFLINE);
break; break;
case '1': case '1':
ret = cpu_up(cpu->sysdev.id); ret = smp_prepare_cpu(cpu->sysdev.id);
if (ret == 0)
ret = cpu_up(cpu->sysdev.id);
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
......
...@@ -29,9 +29,11 @@ extern void release_vm86_irqs(struct task_struct *); ...@@ -29,9 +29,11 @@ extern void release_vm86_irqs(struct task_struct *);
#ifdef CONFIG_4KSTACKS #ifdef CONFIG_4KSTACKS
extern void irq_ctx_init(int cpu); extern void irq_ctx_init(int cpu);
extern void irq_ctx_exit(int cpu);
# define __ARCH_HAS_DO_SOFTIRQ # define __ARCH_HAS_DO_SOFTIRQ
#else #else
# define irq_ctx_init(cpu) do { } while (0) # define irq_ctx_init(cpu) do { } while (0)
# define irq_ctx_exit(cpu) do { } while (0)
#endif #endif
#ifdef CONFIG_IRQBALANCE #ifdef CONFIG_IRQBALANCE
......
...@@ -48,6 +48,14 @@ extern void unlock_ipi_call_lock(void); ...@@ -48,6 +48,14 @@ extern void unlock_ipi_call_lock(void);
#define MAX_APICID 256 #define MAX_APICID 256
extern u8 x86_cpu_to_apicid[]; extern u8 x86_cpu_to_apicid[];
#ifdef CONFIG_HOTPLUG_CPU
extern void cpu_exit_clear(void);
extern void cpu_uninit(void);
#define __HAVE_ARCH_SMP_PREPARE_CPU
extern int smp_prepare_cpu(int cpu);
#endif
/* /*
* This function is needed by all SMP systems. It must _always_ be valid * This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(), * from the initial startup. We map APIC_BASE very early in page_setup(),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部