smpboot_32.c 32.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
/*
 *	x86 SMP booting functions
 *
 *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
 *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
 *
 *	Much of the core SMP work is based on previous work by Thomas Radke, to
 *	whom a great many thanks are extended.
 *
 *	Thanks to Intel for making available several different Pentium,
 *	Pentium Pro and Pentium-II/Xeon MP machines.
 *	Original development of Linux SMP code supported by Caldera.
 *
 *	This code is released under the GNU General Public License version 2 or
 *	later.
 *
 *	Fixes
 *		Felix Koop	:	NR_CPUS used properly
 *		Jose Renau	:	Handle single CPU case.
 *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
 *		Greg Wright	:	Fix for kernel stacks panic.
 *		Erich Boleyn	:	MP v1.4 and additional changes.
 *	Matthias Sattler	:	Changes for 2.1 kernel map.
 *	Michel Lespinasse	:	Changes for 2.1 kernel map.
 *	Michael Chastain	:	Change trampoline.S to gnu as.
 *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
 *		Ingo Molnar	:	Added APIC timers, based on code
 *					from Jose Renau
 *		Ingo Molnar	:	various cleanups and rewrites
 *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
 *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
 *		Martin J. Bligh	: 	Added support for multi-quad systems
 *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
*		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/bootmem.h>
Z
Zwane Mwaikambo 已提交
44 45 46
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/percpu.h>
47
#include <linux/nmi.h>
L
Linus Torvalds 已提交
48 49 50 51 52 53

#include <linux/delay.h>
#include <linux/mc146818rtc.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/arch_hooks.h>
54
#include <asm/nmi.h>
L
Linus Torvalds 已提交
55 56 57 58

#include <mach_apic.h>
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
59
#include <asm/vmi.h>
60
#include <asm/mtrr.h>
L
Linus Torvalds 已提交
61 62

/* Set if we find a B stepping CPU */
L
Li Shaohua 已提交
63
static int __devinitdata smp_b_stepping;
L
Linus Torvalds 已提交
64 65 66

/* Number of siblings per CPU package */
int smp_num_siblings = 1;
67
EXPORT_SYMBOL(smp_num_siblings);
68

69 70 71
/* Last level cache ID of each logical CPU */
int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};

72
/* representing HT siblings of each logical CPU */
73 74
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
75

76
/* representing HT and core siblings of each logical CPU */
77 78
DEFINE_PER_CPU(cpumask_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
79

L
Linus Torvalds 已提交
80
/* bitmap of online cpus */
81
cpumask_t cpu_online_map __read_mostly;
82
EXPORT_SYMBOL(cpu_online_map);
L
Linus Torvalds 已提交
83 84 85

cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
86
EXPORT_SYMBOL(cpu_callout_map);
87 88
cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
L
Linus Torvalds 已提交
89 90 91 92
static cpumask_t smp_commenced_mask;

/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
93
EXPORT_SYMBOL(cpu_data);
L
Linus Torvalds 已提交
94

95 96 97 98 99 100 101 102 103 104 105
/*
 * The following static array is used during kernel startup
 * and the x86_cpu_to_apicid_ptr contains the address of the
 * array during this time.  Is it zeroed when the per_cpu
 * data area is removed.
 */
u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
			{ [0 ... NR_CPUS-1] = BAD_APICID };
void *x86_cpu_to_apicid_ptr;
DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
L
Linus Torvalds 已提交
106

107 108
u8 apicid_2_node[MAX_APICID];

L
Linus Torvalds 已提交
109 110 111 112
/*
 * Trampoline 80x86 program as an array.
 */

J
Jan Beulich 已提交
113 114
extern const unsigned char trampoline_data [];
extern const unsigned char trampoline_end  [];
L
Linus Torvalds 已提交
115 116 117 118 119
static unsigned char *trampoline_base;
static int trampoline_exec;

static void map_cpu_to_logical_apicid(void);

Z
Zwane Mwaikambo 已提交
120 121 122
/* State of each CPU. */
DEFINE_PER_CPU(int, cpu_state) = { 0 };

L
Linus Torvalds 已提交
123 124 125 126 127 128
/*
 * Currently trivial. Write the real->protected mode
 * bootstrap into the page concerned. The caller
 * has made sure it's suitably aligned.
 */

129
static unsigned long __cpuinit setup_trampoline(void)
L
Linus Torvalds 已提交
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
{
	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
	return virt_to_phys(trampoline_base);
}

/*
 * We are called very early to get the low memory for the
 * SMP bootup trampoline page.
 */
void __init smp_alloc_memory(void)
{
	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
	/*
	 * Has to be in very low memory so we can execute
	 * real-mode AP code.
	 */
	if (__pa(trampoline_base) >= 0x9F000)
		BUG();
	/*
	 * Make the SMP trampoline executable:
	 */
	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
}

/*
 * The bootstrap kernel entry code has set these up. Save them for
 * a given CPU
 */

159
void __cpuinit smp_store_cpu_info(int id)
L
Linus Torvalds 已提交
160 161 162 163 164
{
	struct cpuinfo_x86 *c = cpu_data + id;

	*c = boot_cpu_data;
	if (id!=0)
165
		identify_secondary_cpu(c);
L
Linus Torvalds 已提交
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
	/*
	 * Mask B, Pentium, but not Pentium MMX
	 */
	if (c->x86_vendor == X86_VENDOR_INTEL &&
	    c->x86 == 5 &&
	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
	    c->x86_model <= 3)
		/*
		 * Remember we have B step Pentia with bugs
		 */
		smp_b_stepping = 1;

	/*
	 * Certain Athlons might work (for various values of 'work') in SMP
	 * but they are not certified as MP capable.
	 */
	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {

184 185 186
		if (num_possible_cpus() == 1)
			goto valid_k7;

L
Linus Torvalds 已提交
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
		/* Athlon 660/661 is valid. */	
		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
			goto valid_k7;

		/* Duron 670 is valid */
		if ((c->x86_model==7) && (c->x86_mask==0))
			goto valid_k7;

		/*
		 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
		 * It's worth noting that the A5 stepping (662) of some Athlon XP's
		 * have the MP bit set.
		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
		 */
		if (((c->x86_model==6) && (c->x86_mask>=2)) ||
		    ((c->x86_model==7) && (c->x86_mask>=1)) ||
		     (c->x86_model> 7))
			if (cpu_has_mp)
				goto valid_k7;

		/* If we get here, it's not a certified SMP capable AMD system. */
208
		add_taint(TAINT_UNSAFE_SMP);
L
Linus Torvalds 已提交
209 210 211 212 213 214 215 216 217 218
	}

valid_k7:
	;
}

extern void calibrate_delay(void);

static atomic_t init_deasserted;

219
static void __cpuinit smp_callin(void)
L
Linus Torvalds 已提交
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
{
	int cpuid, phys_id;
	unsigned long timeout;

	/*
	 * If waken up by an INIT in an 82489DX configuration
	 * we may get here before an INIT-deassert IPI reaches
	 * our local APIC.  We have to wait for the IPI or we'll
	 * lock up on an APIC access.
	 */
	wait_for_init_deassert(&init_deasserted);

	/*
	 * (This works even if the APIC is not enabled.)
	 */
	phys_id = GET_APIC_ID(apic_read(APIC_ID));
	cpuid = smp_processor_id();
	if (cpu_isset(cpuid, cpu_callin_map)) {
		printk("huh, phys CPU#%d, CPU#%d already present??\n",
					phys_id, cpuid);
		BUG();
	}
	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);

	/*
	 * STARTUP IPIs are fragile beasts as they might sometimes
	 * trigger some glue motherboard logic. Complete APIC bus
	 * silence for 1 second, this overestimates the time the
	 * boot CPU is spending to send the up to 2 STARTUP IPIs
	 * by a factor of two. This should be enough.
	 */

	/*
	 * Waiting 2s total for startup (udelay is not yet working)
	 */
	timeout = jiffies + 2*HZ;
	while (time_before(jiffies, timeout)) {
		/*
		 * Has the boot CPU finished it's STARTUP sequence?
		 */
		if (cpu_isset(cpuid, cpu_callout_map))
			break;
		rep_nop();
	}

	if (!time_before(jiffies, timeout)) {
		printk("BUG: CPU%d started up but did not get a callout!\n",
			cpuid);
		BUG();
	}

	/*
	 * the boot CPU has finished the init stage and is spinning
	 * on callin_map until we finish. We are free to set up this
	 * CPU, first the APIC. (this is probably redundant on most
	 * boards)
	 */

	Dprintk("CALLIN, before setup_local_APIC().\n");
	smp_callin_clear_local_apic();
	setup_local_APIC();
	map_cpu_to_logical_apicid();

	/*
	 * Get our bogomips.
	 */
	calibrate_delay();
	Dprintk("Stack at about %p\n",&cpuid);

	/*
	 * Save our processor parameters
	 */
292
	smp_store_cpu_info(cpuid);
L
Linus Torvalds 已提交
293 294 295 296 297 298 299 300 301

	/*
	 * Allow the master to continue.
	 */
	cpu_set(cpuid, cpu_callin_map);
}

static int cpucount;

302 303 304 305 306 307
/* maps the cpu to the sched domain representing multi-core */
cpumask_t cpu_coregroup_map(int cpu)
{
	struct cpuinfo_x86 *c = cpu_data + cpu;
	/*
	 * For perf, we return last level cache shared map.
308
	 * And for power savings, we return cpu_core_map
309
	 */
310
	if (sched_mc_power_savings || sched_smt_power_savings)
311
		return per_cpu(cpu_core_map, cpu);
312 313
	else
		return c->llc_shared_map;
314 315
}

316 317 318
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;

319
void __cpuinit set_cpu_sibling_map(int cpu)
320 321
{
	int i;
322 323 324
	struct cpuinfo_x86 *c = cpu_data;

	cpu_set(cpu, cpu_sibling_setup_map);
325 326

	if (smp_num_siblings > 1) {
327
		for_each_cpu_mask(i, cpu_sibling_setup_map) {
328 329
			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
330 331
				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
332 333
				cpu_set(i, per_cpu(cpu_core_map, cpu));
				cpu_set(cpu, per_cpu(cpu_core_map, i));
334 335
				cpu_set(i, c[cpu].llc_shared_map);
				cpu_set(cpu, c[i].llc_shared_map);
336 337 338
			}
		}
	} else {
339
		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
340 341
	}

342 343
	cpu_set(cpu, c[cpu].llc_shared_map);

344
	if (current_cpu_data.x86_max_cores == 1) {
345
		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
346 347 348 349 350
		c[cpu].booted_cores = 1;
		return;
	}

	for_each_cpu_mask(i, cpu_sibling_setup_map) {
351 352 353 354 355
		if (cpu_llc_id[cpu] != BAD_APICID &&
		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
			cpu_set(i, c[cpu].llc_shared_map);
			cpu_set(cpu, c[i].llc_shared_map);
		}
356
		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
357 358
			cpu_set(i, per_cpu(cpu_core_map, cpu));
			cpu_set(cpu, per_cpu(cpu_core_map, i));
359 360 361
			/*
			 *  Does this new cpu bringup a new core?
			 */
362
			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
363 364 365 366
				/*
				 * for each core in package, increment
				 * the booted_cores for this new cpu
				 */
367
				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
368 369 370 371 372 373 374 375 376 377
					c[cpu].booted_cores++;
				/*
				 * increment the core count for all
				 * the other cpus in this package
				 */
				if (i != cpu)
					c[i].booted_cores++;
			} else if (i != cpu && !c[cpu].booted_cores)
				c[cpu].booted_cores = c[i].booted_cores;
		}
378 379 380
	}
}

L
Linus Torvalds 已提交
381 382 383
/*
 * Activate a secondary processor.
 */
384
static void __cpuinit start_secondary(void *unused)
L
Linus Torvalds 已提交
385 386
{
	/*
387 388 389
	 * Don't put *anything* before cpu_init(), SMP booting is too
	 * fragile that we want to limit the things done here to the
	 * most necessary things.
L
Linus Torvalds 已提交
390
	 */
391 392 393
#ifdef CONFIG_VMI
	vmi_bringup();
#endif
394
	cpu_init();
395
	preempt_disable();
L
Linus Torvalds 已提交
396 397 398
	smp_callin();
	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
		rep_nop();
399 400 401 402 403
	/*
	 * Check TSC synchronization with the BP:
	 */
	check_tsc_sync_target();

Z
Zachary Amsden 已提交
404
	setup_secondary_clock();
L
Linus Torvalds 已提交
405 406 407 408 409 410 411 412 413 414
	if (nmi_watchdog == NMI_IO_APIC) {
		disable_8259A_irq(0);
		enable_NMI_through_LVT0(NULL);
		enable_8259A_irq(0);
	}
	/*
	 * low-memory mappings have been cleared, flush them from
	 * the local TLBs too.
	 */
	local_flush_tlb();
L
Li Shaohua 已提交
415

416 417 418 419
	/* This must be done before setting cpu_online_map */
	set_cpu_sibling_map(raw_smp_processor_id());
	wmb();

L
Li Shaohua 已提交
420 421 422 423 424 425 426 427 428
	/*
	 * We need to hold call_lock, so there is no inconsistency
	 * between the time smp_call_function() determines number of
	 * IPI receipients, and the time when the determination is made
	 * for which cpus receive the IPI. Holding this
	 * lock helps us to not include this cpu in a currently in progress
	 * smp_call_function().
	 */
	lock_ipi_call_lock();
L
Linus Torvalds 已提交
429
	cpu_set(smp_processor_id(), cpu_online_map);
L
Li Shaohua 已提交
430
	unlock_ipi_call_lock();
431
	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
L
Linus Torvalds 已提交
432 433 434 435 436 437 438 439 440 441 442 443 444 445

	/* We can take interrupts now: we're officially "up". */
	local_irq_enable();

	wmb();
	cpu_idle();
}

/*
 * Everything has been set up for the secondary
 * CPUs - they just need to reload everything
 * from the task structure
 * This function must not return.
 */
L
Li Shaohua 已提交
446
void __devinit initialize_secondary(void)
L
Linus Torvalds 已提交
447 448 449 450 451 452 453 454 455 456
{
	/*
	 * We don't actually need to load the full TSS,
	 * basically just the stack pointer and the eip.
	 */

	asm volatile(
		"movl %0,%%esp\n\t"
		"jmp *%1"
		:
457
		:"m" (current->thread.esp),"m" (current->thread.eip));
L
Linus Torvalds 已提交
458 459
}

460
/* Static state in head.S used to set up a CPU */
L
Linus Torvalds 已提交
461 462 463 464 465 466 467 468
extern struct {
	void * esp;
	unsigned short ss;
} stack_start;

#ifdef CONFIG_NUMA

/* which logical CPUs are on which nodes */
469
cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
L
Linus Torvalds 已提交
470
				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
471
EXPORT_SYMBOL(node_2_cpu_mask);
L
Linus Torvalds 已提交
472
/* which node each logical CPU is on */
473
int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
L
Linus Torvalds 已提交
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
EXPORT_SYMBOL(cpu_2_node);

/* set up a mapping between cpu and node. */
static inline void map_cpu_to_node(int cpu, int node)
{
	printk("Mapping cpu %d to node %d\n", cpu, node);
	cpu_set(cpu, node_2_cpu_mask[node]);
	cpu_2_node[cpu] = node;
}

/* undo a mapping between cpu and node. */
static inline void unmap_cpu_to_node(int cpu)
{
	int node;

	printk("Unmapping cpu %d from all nodes\n", cpu);
	for (node = 0; node < MAX_NUMNODES; node ++)
		cpu_clear(cpu, node_2_cpu_mask[node]);
	cpu_2_node[cpu] = 0;
}
#else /* !CONFIG_NUMA */

#define map_cpu_to_node(cpu, node)	({})
#define unmap_cpu_to_node(cpu)	({})

#endif /* CONFIG_NUMA */

501
u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
L
Linus Torvalds 已提交
502 503 504 505 506

static void map_cpu_to_logical_apicid(void)
{
	int cpu = smp_processor_id();
	int apicid = logical_smp_processor_id();
507
	int node = apicid_to_node(apicid);
508 509 510

	if (!node_online(node))
		node = first_online_node;
L
Linus Torvalds 已提交
511 512

	cpu_2_logical_apicid[cpu] = apicid;
513
	map_cpu_to_node(cpu, node);
L
Linus Torvalds 已提交
514 515 516 517 518 519 520 521 522 523 524 525
}

static void unmap_cpu_to_logical_apicid(int cpu)
{
	cpu_2_logical_apicid[cpu] = BAD_APICID;
	unmap_cpu_to_node(cpu);
}

static inline void __inquire_remote_apic(int apicid)
{
	int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
	char *names[] = { "ID", "VERSION", "SPIV" };
526 527
	int timeout;
	unsigned long status;
L
Linus Torvalds 已提交
528 529 530

	printk("Inquiring remote APIC #%d...\n", apicid);

531
	for (i = 0; i < ARRAY_SIZE(regs); i++) {
L
Linus Torvalds 已提交
532 533 534 535 536
		printk("... APIC #%d %s: ", apicid, names[i]);

		/*
		 * Wait for idle.
		 */
537 538 539
		status = safe_apic_wait_icr_idle();
		if (status)
			printk("a previous APIC delivery may have failed\n");
L
Linus Torvalds 已提交
540 541 542 543 544 545 546 547 548 549 550 551 552

		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);

		timeout = 0;
		do {
			udelay(100);
			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);

		switch (status) {
		case APIC_ICR_RR_VALID:
			status = apic_read(APIC_RRR);
553
			printk("%lx\n", status);
L
Linus Torvalds 已提交
554 555 556 557 558 559 560 561 562 563 564 565 566
			break;
		default:
			printk("failed\n");
		}
	}
}

#ifdef WAKE_SECONDARY_VIA_NMI
/* 
 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
 * won't ... remember to clear down the APIC, etc later.
 */
L
Li Shaohua 已提交
567
static int __devinit
L
Linus Torvalds 已提交
568 569
wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
{
570 571
	unsigned long send_status, accept_status = 0;
	int maxlvt;
L
Linus Torvalds 已提交
572 573 574 575 576 577 578 579 580

	/* Target chip */
	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));

	/* Boot on the stack */
	/* Kick the second */
	apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);

	Dprintk("Waiting for send to finish...\n");
581
	send_status = safe_apic_wait_icr_idle();
L
Linus Torvalds 已提交
582 583 584 585 586 587 588 589

	/*
	 * Give the other CPU some time to accept the IPI.
	 */
	udelay(200);
	/*
	 * Due to the Pentium erratum 3AP.
	 */
590
	maxlvt = lapic_get_maxlvt();
L
Linus Torvalds 已提交
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
	if (maxlvt > 3) {
		apic_read_around(APIC_SPIV);
		apic_write(APIC_ESR, 0);
	}
	accept_status = (apic_read(APIC_ESR) & 0xEF);
	Dprintk("NMI sent.\n");

	if (send_status)
		printk("APIC never delivered???\n");
	if (accept_status)
		printk("APIC delivery error (%lx).\n", accept_status);

	return (send_status | accept_status);
}
#endif	/* WAKE_SECONDARY_VIA_NMI */

#ifdef WAKE_SECONDARY_VIA_INIT
L
Li Shaohua 已提交
608
static int __devinit
L
Linus Torvalds 已提交
609 610
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
611 612
	unsigned long send_status, accept_status = 0;
	int maxlvt, num_starts, j;
L
Linus Torvalds 已提交
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636

	/*
	 * Be paranoid about clearing APIC errors.
	 */
	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
		apic_read_around(APIC_SPIV);
		apic_write(APIC_ESR, 0);
		apic_read(APIC_ESR);
	}

	Dprintk("Asserting INIT.\n");

	/*
	 * Turn INIT on target chip
	 */
	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

	/*
	 * Send IPI
	 */
	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
				| APIC_DM_INIT);

	Dprintk("Waiting for send to finish...\n");
637
	send_status = safe_apic_wait_icr_idle();
L
Linus Torvalds 已提交
638 639 640 641 642 643 644 645 646 647 648 649

	mdelay(10);

	Dprintk("Deasserting INIT.\n");

	/* Target chip */
	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

	/* Send IPI */
	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);

	Dprintk("Waiting for send to finish...\n");
650
	send_status = safe_apic_wait_icr_idle();
L
Linus Torvalds 已提交
651 652 653 654 655 656 657 658 659 660 661 662 663 664

	atomic_set(&init_deasserted, 1);

	/*
	 * Should we send STARTUP IPIs ?
	 *
	 * Determine this based on the APIC version.
	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
	 */
	if (APIC_INTEGRATED(apic_version[phys_apicid]))
		num_starts = 2;
	else
		num_starts = 0;

665 666 667 668 669 670 671
	/*
	 * Paravirt / VMI wants a startup IPI hook here to set up the
	 * target processor state.
	 */
	startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
		         (unsigned long) stack_start.esp);

L
Linus Torvalds 已提交
672 673 674 675 676
	/*
	 * Run STARTUP IPI loop.
	 */
	Dprintk("#startup loops: %d.\n", num_starts);

677
	maxlvt = lapic_get_maxlvt();
L
Linus Torvalds 已提交
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705

	for (j = 1; j <= num_starts; j++) {
		Dprintk("Sending STARTUP #%d.\n",j);
		apic_read_around(APIC_SPIV);
		apic_write(APIC_ESR, 0);
		apic_read(APIC_ESR);
		Dprintk("After apic_write.\n");

		/*
		 * STARTUP IPI
		 */

		/* Target chip */
		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

		/* Boot on the stack */
		/* Kick the second */
		apic_write_around(APIC_ICR, APIC_DM_STARTUP
					| (start_eip >> 12));

		/*
		 * Give the other CPU some time to accept the IPI.
		 */
		udelay(300);

		Dprintk("Startup point 1.\n");

		Dprintk("Waiting for send to finish...\n");
706
		send_status = safe_apic_wait_icr_idle();
L
Linus Torvalds 已提交
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734

		/*
		 * Give the other CPU some time to accept the IPI.
		 */
		udelay(200);
		/*
		 * Due to the Pentium erratum 3AP.
		 */
		if (maxlvt > 3) {
			apic_read_around(APIC_SPIV);
			apic_write(APIC_ESR, 0);
		}
		accept_status = (apic_read(APIC_ESR) & 0xEF);
		if (send_status || accept_status)
			break;
	}
	Dprintk("After Startup.\n");

	if (send_status)
		printk("APIC never delivered???\n");
	if (accept_status)
		printk("APIC delivery error (%lx).\n", accept_status);

	return (send_status | accept_status);
}
#endif	/* WAKE_SECONDARY_VIA_INIT */

extern cpumask_t cpu_initialized;
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
static inline int alloc_cpu_id(void)
{
	cpumask_t	tmp_map;
	int cpu;
	cpus_complement(tmp_map, cpu_present_map);
	cpu = first_cpu(tmp_map);
	if (cpu >= NR_CPUS)
		return -ENODEV;
	return cpu;
}

#ifdef CONFIG_HOTPLUG_CPU
static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
static inline struct task_struct * alloc_idle_task(int cpu)
{
	struct task_struct *idle;

	if ((idle = cpu_idle_tasks[cpu]) != NULL) {
		/* initialize thread_struct.  we really want to avoid destroy
		 * idle tread
		 */
A
akpm@osdl.org 已提交
756
		idle->thread.esp = (unsigned long)task_pt_regs(idle);
757 758 759 760 761 762 763 764 765 766 767 768
		init_idle(idle, cpu);
		return idle;
	}
	idle = fork_idle(cpu);

	if (!IS_ERR(idle))
		cpu_idle_tasks[cpu] = idle;
	return idle;
}
#else
#define alloc_idle_task(cpu) fork_idle(cpu)
#endif
L
Linus Torvalds 已提交
769

770
static int __cpuinit do_boot_cpu(int apicid, int cpu)
L
Linus Torvalds 已提交
771 772 773 774 775 776 777 778
/*
 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
 */
{
	struct task_struct *idle;
	unsigned long boot_error;
779
	int timeout;
L
Linus Torvalds 已提交
780 781 782
	unsigned long start_eip;
	unsigned short nmi_high = 0, nmi_low = 0;

783 784 785 786 787 788
	/*
	 * Save current MTRR state in case it was changed since early boot
	 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
	 */
	mtrr_save_state();

L
Linus Torvalds 已提交
789 790 791 792
	/*
	 * We can't use kernel_thread since we must avoid to
	 * reschedule the child.
	 */
793
	idle = alloc_idle_task(cpu);
L
Linus Torvalds 已提交
794 795
	if (IS_ERR(idle))
		panic("failed fork for CPU %d", cpu);
796

797 798
	init_gdt(cpu);
 	per_cpu(current_task, cpu) = idle;
799
	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
800

L
Linus Torvalds 已提交
801 802 803 804
	idle->thread.eip = (unsigned long) start_secondary;
	/* start_eip had better be page-aligned! */
	start_eip = setup_trampoline();

805 806 807
	++cpucount;
	alternatives_smp_switch(1);

L
Linus Torvalds 已提交
808 809 810 811 812 813 814
	/* So we see what's up   */
	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
	/* Stack for startup_32 can be just as for start_secondary onwards */
	stack_start.esp = (void *) idle->thread.esp;

	irq_ctx_init(cpu);

815
	per_cpu(x86_cpu_to_apicid, cpu) = apicid;
L
Linus Torvalds 已提交
816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
	/*
	 * This grunge runs the startup process for
	 * the targeted processor.
	 */

	atomic_set(&init_deasserted, 0);

	Dprintk("Setting warm reset code and vector.\n");

	store_NMI_vector(&nmi_high, &nmi_low);

	smpboot_setup_warm_reset_vector(start_eip);

	/*
	 * Starting actual IPI sequence...
	 */
	boot_error = wakeup_secondary_cpu(apicid, start_eip);

	if (!boot_error) {
		/*
		 * allow APs to start initializing.
		 */
		Dprintk("Before Callout %d.\n", cpu);
		cpu_set(cpu, cpu_callout_map);
		Dprintk("After Callout %d.\n", cpu);

		/*
		 * Wait 5s total for a response
		 */
		for (timeout = 0; timeout < 50000; timeout++) {
			if (cpu_isset(cpu, cpu_callin_map))
				break;	/* It has booted */
			udelay(100);
		}

		if (cpu_isset(cpu, cpu_callin_map)) {
			/* number CPUs logically, starting from 1 (BSP is 0) */
			Dprintk("OK.\n");
			printk("CPU%d: ", cpu);
			print_cpu_info(&cpu_data[cpu]);
			Dprintk("CPU has booted.\n");
		} else {
			boot_error= 1;
			if (*((volatile unsigned char *)trampoline_base)
					== 0xA5)
				/* trampoline started but...? */
				printk("Stuck ??\n");
			else
				/* trampoline code not run */
				printk("Not responding.\n");
			inquire_remote_apic(apicid);
		}
	}
869

L
Linus Torvalds 已提交
870 871 872 873 874 875
	if (boot_error) {
		/* Try to put things back the way they were before ... */
		unmap_cpu_to_logical_apicid(cpu);
		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
		cpucount--;
876
	} else {
877
		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
878
		cpu_set(cpu, cpu_present_map);
L
Linus Torvalds 已提交
879 880 881 882 883 884 885 886
	}

	/* mark "stuck" area as not stuck */
	*((volatile unsigned long *)trampoline_base) = 0;

	return boot_error;
}

887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
#ifdef CONFIG_HOTPLUG_CPU
void cpu_exit_clear(void)
{
	int cpu = raw_smp_processor_id();

	idle_task_exit();

	cpucount --;
	cpu_uninit();
	irq_ctx_exit(cpu);

	cpu_clear(cpu, cpu_callout_map);
	cpu_clear(cpu, cpu_callin_map);

	cpu_clear(cpu, smp_commenced_mask);
	unmap_cpu_to_logical_apicid(cpu);
}

struct warm_boot_cpu_info {
	struct completion *complete;
D
David Howells 已提交
907
	struct work_struct task;
908 909 910 911
	int apicid;
	int cpu;
};

D
David Howells 已提交
912
static void __cpuinit do_warm_boot_cpu(struct work_struct *work)
913
{
D
David Howells 已提交
914 915
	struct warm_boot_cpu_info *info =
		container_of(work, struct warm_boot_cpu_info, task);
916 917 918 919
	do_boot_cpu(info->apicid, info->cpu);
	complete(info->complete);
}

920
static int __cpuinit __smp_prepare_cpu(int cpu)
921
{
922
	DECLARE_COMPLETION_ONSTACK(done);
923 924 925
	struct warm_boot_cpu_info info;
	int	apicid, ret;

926
	apicid = per_cpu(x86_cpu_to_apicid, cpu);
927 928 929 930 931 932 933 934
	if (apicid == BAD_APICID) {
		ret = -ENODEV;
		goto exit;
	}

	info.complete = &done;
	info.apicid = apicid;
	info.cpu = cpu;
D
David Howells 已提交
935
	INIT_WORK(&info.task, do_warm_boot_cpu);
936 937

	/* init low mem mapping */
938
	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
939
			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
940
	flush_tlb_all();
D
David Howells 已提交
941
	schedule_work(&info.task);
942 943 944 945 946 947 948 949 950
	wait_for_completion(&done);

	zap_low_mappings();
	ret = 0;
exit:
	return ret;
}
#endif

L
Linus Torvalds 已提交
951 952 953 954 955 956 957
/*
 * Cycle through the processors sending APIC IPIs to boot each.
 */

static int boot_cpu_logical_apicid;
/* Where the IO area was mapped on multiquad, always 0 otherwise */
void *xquad_portio;
958 959 960
#ifdef CONFIG_X86_NUMAQ
EXPORT_SYMBOL(xquad_portio);
#endif
L
Linus Torvalds 已提交
961 962 963 964 965 966 967 968 969 970 971 972 973

static void __init smp_boot_cpus(unsigned int max_cpus)
{
	int apicid, cpu, bit, kicked;
	unsigned long bogosum = 0;

	/*
	 * Setup boot CPU information
	 */
	smp_store_cpu_info(0); /* Final full version of the data */
	printk("CPU%d: ", 0);
	print_cpu_info(&cpu_data[0]);

974
	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
L
Linus Torvalds 已提交
975
	boot_cpu_logical_apicid = logical_smp_processor_id();
976
	per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid;
L
Linus Torvalds 已提交
977 978 979

	current_thread_info()->cpu = 0;

980
	set_cpu_sibling_map(0);
981

L
Linus Torvalds 已提交
982 983 984 985 986 987
	/*
	 * If we couldn't find an SMP configuration at boot time,
	 * get out of here now!
	 */
	if (!smp_found_config && !acpi_lapic) {
		printk(KERN_NOTICE "SMP motherboard not detected.\n");
988 989 990 991 992 993
		smpboot_clear_io_apic_irqs();
		phys_cpu_present_map = physid_mask_of_physid(0);
		if (APIC_init_uniprocessor())
			printk(KERN_NOTICE "Local APIC not detected."
					   " Using dummy APIC emulation.\n");
		map_cpu_to_logical_apicid();
994
		cpu_set(0, per_cpu(cpu_sibling_map, 0));
995
		cpu_set(0, per_cpu(cpu_core_map, 0));
996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
		return;
	}

	/*
	 * Should not be necessary because the MP table should list the boot
	 * CPU too, but we do it for the sake of robustness anyway.
	 * Makes no sense to do this check in clustered apic mode, so skip it
	 */
	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
				boot_cpu_physical_apicid);
		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
	}

	/*
	 * If we couldn't find a local APIC, then get out of here now!
	 */
	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
			boot_cpu_physical_apicid);
		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
		smpboot_clear_io_apic_irqs();
		phys_cpu_present_map = physid_mask_of_physid(0);
1019
		map_cpu_to_logical_apicid();
1020
		cpu_set(0, per_cpu(cpu_sibling_map, 0));
1021
		cpu_set(0, per_cpu(cpu_core_map, 0));
L
Linus Torvalds 已提交
1022 1023 1024
		return;
	}

1025 1026
	verify_local_APIC();

L
Linus Torvalds 已提交
1027 1028 1029
	/*
	 * If SMP should be disabled, then really disable it!
	 */
1030 1031 1032
	if (!max_cpus) {
		smp_found_config = 0;
		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
I
Ingo Molnar 已提交
1033 1034 1035 1036 1037 1038

		if (nmi_watchdog == NMI_LOCAL_APIC) {
			printk(KERN_INFO "activating minimal APIC for NMI watchdog use.\n");
			connect_bsp_APIC();
			setup_local_APIC();
		}
1039 1040
		smpboot_clear_io_apic_irqs();
		phys_cpu_present_map = physid_mask_of_physid(0);
1041
		map_cpu_to_logical_apicid();
1042
		cpu_set(0, per_cpu(cpu_sibling_map, 0));
1043
		cpu_set(0, per_cpu(cpu_core_map, 0));
L
Linus Torvalds 已提交
1044 1045 1046
		return;
	}

1047 1048 1049 1050 1051
	connect_bsp_APIC();
	setup_local_APIC();
	map_cpu_to_logical_apicid();


L
Linus Torvalds 已提交
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
	setup_portio_remap();

	/*
	 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
	 *
	 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
	 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
	 * clustered apic ID.
	 */
	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));

	kicked = 1;
	for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
		apicid = cpu_present_to_apicid(bit);
		/*
		 * Don't even attempt to start the boot CPU!
		 */
		if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
			continue;

		if (!check_apicid_present(bit))
			continue;
		if (max_cpus <= cpucount+1)
			continue;

1077
		if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
L
Linus Torvalds 已提交
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
			printk("CPU #%d not responding - cannot use it.\n",
								apicid);
		else
			++kicked;
	}

	/*
	 * Cleanup possible dangling ends...
	 */
	smpboot_restore_warm_reset_vector();

	/*
	 * Allow the user to impress friends.
	 */
	Dprintk("Before bogomips.\n");
	for (cpu = 0; cpu < NR_CPUS; cpu++)
		if (cpu_isset(cpu, cpu_callout_map))
			bogosum += cpu_data[cpu].loops_per_jiffy;
	printk(KERN_INFO
		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
		cpucount+1,
		bogosum/(500000/HZ),
		(bogosum/(5000/HZ))%100);
	
	Dprintk("Before bogocount - setting activated=1.\n");

	if (smp_b_stepping)
		printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");

	/*
	 * Don't taint if we are running SMP kernel on a single non-MP
	 * approved Athlon
	 */
	if (tainted & TAINT_UNSAFE_SMP) {
		if (cpucount)
			printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
		else
			tainted &= ~TAINT_UNSAFE_SMP;
	}

	Dprintk("Boot done.\n");

	/*
1121
	 * construct cpu_sibling_map, so that we can tell sibling CPUs
L
Linus Torvalds 已提交
1122 1123
	 * efficiently.
	 */
1124
	for (cpu = 0; cpu < NR_CPUS; cpu++) {
1125
		cpus_clear(per_cpu(cpu_sibling_map, cpu));
1126
		cpus_clear(per_cpu(cpu_core_map, cpu));
1127
	}
L
Linus Torvalds 已提交
1128

1129
	cpu_set(0, per_cpu(cpu_sibling_map, 0));
1130
	cpu_set(0, per_cpu(cpu_core_map, 0));
L
Linus Torvalds 已提交
1131

1132 1133
	smpboot_setup_io_apic();

Z
Zachary Amsden 已提交
1134
	setup_boot_clock();
L
Linus Torvalds 已提交
1135 1136 1137 1138
}

/* These are wrappers to interface to the new boot process.  Someone
   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1139
void __init native_smp_prepare_cpus(unsigned int max_cpus)
L
Linus Torvalds 已提交
1140
{
Z
Zwane Mwaikambo 已提交
1141 1142 1143
	smp_commenced_mask = cpumask_of_cpu(0);
	cpu_callin_map = cpumask_of_cpu(0);
	mb();
L
Linus Torvalds 已提交
1144 1145 1146
	smp_boot_cpus(max_cpus);
}

1147
void __init native_smp_prepare_boot_cpu(void)
1148 1149 1150
{
	unsigned int cpu = smp_processor_id();

1151
	init_gdt(cpu);
1152 1153 1154 1155 1156 1157 1158
	switch_to_new_gdt();

	cpu_set(cpu, cpu_online_map);
	cpu_set(cpu, cpu_callout_map);
	cpu_set(cpu, cpu_present_map);
	cpu_set(cpu, cpu_possible_map);
	__get_cpu_var(cpu_state) = CPU_ONLINE;
L
Linus Torvalds 已提交
1159 1160
}

Z
Zwane Mwaikambo 已提交
1161
#ifdef CONFIG_HOTPLUG_CPU
1162
void remove_siblinginfo(int cpu)
L
Linus Torvalds 已提交
1163
{
1164
	int sibling;
1165
	struct cpuinfo_x86 *c = cpu_data;
1166

1167 1168 1169
	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
		/*/
1170 1171
		 * last thread sibling in this cpu core going down
		 */
1172
		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
1173 1174 1175
			c[sibling].booted_cores--;
	}
			
1176 1177 1178
	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
	cpus_clear(per_cpu(cpu_sibling_map, cpu));
1179
	cpus_clear(per_cpu(cpu_core_map, cpu));
1180 1181
	c[cpu].phys_proc_id = 0;
	c[cpu].cpu_core_id = 0;
1182
	cpu_clear(cpu, cpu_sibling_setup_map);
Z
Zwane Mwaikambo 已提交
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
}

int __cpu_disable(void)
{
	cpumask_t map = cpu_online_map;
	int cpu = smp_processor_id();

	/*
	 * Perhaps use cpufreq to drop frequency, but that could go
	 * into generic code.
 	 *
	 * We won't take down the boot processor on i386 due to some
	 * interrupts only being able to be serviced by the BSP.
	 * Especially so if we're not using an IOAPIC	-zwane
	 */
	if (cpu == 0)
		return -EBUSY;
1200 1201
	if (nmi_watchdog == NMI_LOCAL_APIC)
		stop_apic_nmi_watchdog(NULL);
1202
	clear_local_APIC();
Z
Zwane Mwaikambo 已提交
1203 1204 1205 1206 1207
	/* Allow any queued timer interrupts to get serviced */
	local_irq_enable();
	mdelay(1);
	local_irq_disable();

1208 1209
	remove_siblinginfo(cpu);

Z
Zwane Mwaikambo 已提交
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223
	cpu_clear(cpu, map);
	fixup_irqs(map);
	/* It's now safe to remove this processor from the online map */
	cpu_clear(cpu, cpu_online_map);
	return 0;
}

void __cpu_die(unsigned int cpu)
{
	/* We don't do anything here: idle task is faking death itself. */
	unsigned int i;

	for (i = 0; i < 10; i++) {
		/* They ack this in play_dead by setting CPU_DEAD */
1224 1225
		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
			printk ("CPU %d is now offline\n", cpu);
G
Gerd Hoffmann 已提交
1226 1227
			if (1 == num_online_cpus())
				alternatives_smp_switch(0);
Z
Zwane Mwaikambo 已提交
1228
			return;
1229
		}
1230
		msleep(100);
L
Linus Torvalds 已提交
1231
	}
Z
Zwane Mwaikambo 已提交
1232 1233 1234 1235 1236 1237 1238
 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
#else /* ... !CONFIG_HOTPLUG_CPU */
int __cpu_disable(void)
{
	return -ENOSYS;
}
L
Linus Torvalds 已提交
1239

Z
Zwane Mwaikambo 已提交
1240 1241 1242 1243 1244 1245 1246
void __cpu_die(unsigned int cpu)
{
	/* We said "no" in __cpu_disable */
	BUG();
}
#endif /* CONFIG_HOTPLUG_CPU */

1247
int __cpuinit native_cpu_up(unsigned int cpu)
Z
Zwane Mwaikambo 已提交
1248
{
1249
	unsigned long flags;
1250
#ifdef CONFIG_HOTPLUG_CPU
1251
	int ret = 0;
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265

	/*
	 * We do warm boot only on cpus that had booted earlier
	 * Otherwise cold boot is all handled from smp_boot_cpus().
	 * cpu_callin_map is set during AP kickstart process. Its reset
	 * when a cpu is taken offline from cpu_exit_clear().
	 */
	if (!cpu_isset(cpu, cpu_callin_map))
		ret = __smp_prepare_cpu(cpu);

	if (ret)
		return -EIO;
#endif

L
Linus Torvalds 已提交
1266 1267
	/* In case one didn't come up */
	if (!cpu_isset(cpu, cpu_callin_map)) {
Z
Zwane Mwaikambo 已提交
1268
		printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
L
Linus Torvalds 已提交
1269 1270 1271
		return -EIO;
	}

1272
	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
L
Linus Torvalds 已提交
1273 1274
	/* Unleash the CPU! */
	cpu_set(cpu, smp_commenced_mask);
1275 1276

	/*
1277 1278
	 * Check TSC synchronization with the AP (keep irqs disabled
	 * while doing so):
1279
	 */
1280
	local_irq_save(flags);
1281
	check_tsc_sync_source(cpu);
1282
	local_irq_restore(flags);
1283

1284
	while (!cpu_isset(cpu, cpu_online_map)) {
A
Andreas Mohr 已提交
1285
		cpu_relax();
1286 1287
		touch_nmi_watchdog();
	}
1288

L
Linus Torvalds 已提交
1289 1290 1291
	return 0;
}

1292
void __init native_smp_cpus_done(unsigned int max_cpus)
L
Linus Torvalds 已提交
1293 1294 1295 1296 1297
{
#ifdef CONFIG_X86_IO_APIC
	setup_ioapic_dest();
#endif
	zap_low_mappings();
1298
#ifndef CONFIG_HOTPLUG_CPU
L
Linus Torvalds 已提交
1299 1300 1301 1302
	/*
	 * Disable executability of the SMP trampoline:
	 */
	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1303
#endif
L
Linus Torvalds 已提交
1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
}

void __init smp_intr_init(void)
{
	/*
	 * IRQ0 must be given a fixed assignment and initialized,
	 * because it's used before the IO-APIC is set up.
	 */
	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);

	/*
	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
	 * IPI, driven by wakeup.
	 */
	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);

	/* IPI for invalidation */
	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);

	/* IPI for generic function call */
	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
}
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338

/*
 * If the BIOS enumerates physical processors before logical,
 * maxcpus=N at enumeration-time can be used to disable HT.
 */
static int __init parse_maxcpus(char *arg)
{
	extern unsigned int maxcpus;

	maxcpus = simple_strtoul(arg, NULL, 0);
	return 0;
}
early_param("maxcpus", parse_maxcpus);