smpboot.c 20.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 *	x86 SMP booting functions
 *
 *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
 *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
 *	Copyright 2001 Andi Kleen, SuSE Labs.
 *
 *	Much of the core SMP work is based on previous work by Thomas Radke, to
 *	whom a great many thanks are extended.
 *
 *	Thanks to Intel for making available several different Pentium,
 *	Pentium Pro and Pentium-II/Xeon MP machines.
 *	Original development of Linux SMP code supported by Caldera.
 *
15
 *	This code is released under the GNU General Public License version 2
L
Linus Torvalds 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
 *
 *	Fixes
 *		Felix Koop	:	NR_CPUS used properly
 *		Jose Renau	:	Handle single CPU case.
 *		Alan Cox	:	By repeated request 8) - Total BogoMIP report.
 *		Greg Wright	:	Fix for kernel stacks panic.
 *		Erich Boleyn	:	MP v1.4 and additional changes.
 *	Matthias Sattler	:	Changes for 2.1 kernel map.
 *	Michel Lespinasse	:	Changes for 2.1 kernel map.
 *	Michael Chastain	:	Change trampoline.S to gnu as.
 *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
 *		Ingo Molnar	:	Added APIC timers, based on code
 *					from Jose Renau
 *		Ingo Molnar	:	various cleanups and rewrites
 *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
 *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
 *	Andi Kleen		:	Changed for SMP boot into long mode.
33 34 35 36
 *		Rusty Russell	:	Hacked into shape for new "hotplug" boot process.
 *      Andi Kleen              :       Converted to new state machine.
 *					Various cleanups.
 *					Probably mostly hotplug CPU ready now.
L
Linus Torvalds 已提交
37 38
 */

39

L
Linus Torvalds 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#include <linux/config.h>
#include <linux/init.h>

#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/smp_lock.h>
#include <linux/irq.h>
#include <linux/bootmem.h>
#include <linux/thread_info.h>
#include <linux/module.h>

#include <linux/delay.h>
#include <linux/mc146818rtc.h>
#include <asm/mtrr.h>
#include <asm/pgalloc.h>
#include <asm/desc.h>
#include <asm/kdebug.h>
#include <asm/tlbflush.h>
#include <asm/proto.h>

60 61 62 63 64
/* Change for real CPU hotplug. Note other files need to be fixed
   first too. */
#define __cpuinit __init
#define __cpuinitdata __initdata

L
Linus Torvalds 已提交
65 66 67 68
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
/* Package ID of each logical CPU */
u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
69
u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
L
Linus Torvalds 已提交
70
EXPORT_SYMBOL(phys_proc_id);
71
EXPORT_SYMBOL(cpu_core_id);
L
Linus Torvalds 已提交
72 73 74 75

/* Bitmask of currently online CPUs */
cpumask_t cpu_online_map;

76 77 78 79 80 81
EXPORT_SYMBOL(cpu_online_map);

/*
 * Private maps to synchronize booting between AP and BP.
 * Probably not needed anymore, but it makes for easier debugging. -AK
 */
L
Linus Torvalds 已提交
82 83
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
84 85 86

cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
L
Linus Torvalds 已提交
87 88 89 90

/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;

91 92 93
/* Set when the idlers are all forked */
int smp_threads_ready;

L
Linus Torvalds 已提交
94
cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
95
cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
L
Linus Torvalds 已提交
96 97 98 99 100

/*
 * Trampoline 80x86 program as an array.
 */

101 102
extern unsigned char trampoline_data[];
extern unsigned char trampoline_end[];
L
Linus Torvalds 已提交
103 104 105 106 107 108 109

/*
 * Currently trivial. Write the real->protected mode
 * bootstrap into the page concerned. The caller
 * has made sure it's suitably aligned.
 */

110
static unsigned long __cpuinit setup_trampoline(void)
L
Linus Torvalds 已提交
111 112 113 114 115 116 117 118 119 120 121
{
	void *tramp = __va(SMP_TRAMPOLINE_BASE); 
	memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
	return virt_to_phys(tramp);
}

/*
 * The bootstrap kernel entry code has set these up. Save them for
 * a given CPU
 */

122
static void __cpuinit smp_store_cpu_info(int id)
L
Linus Torvalds 已提交
123 124 125 126 127 128 129 130
{
	struct cpuinfo_x86 *c = cpu_data + id;

	*c = boot_cpu_data;
	identify_cpu(c);
}

/*
131
 * Synchronize TSCs of CPUs
L
Linus Torvalds 已提交
132
 *
133 134 135
 * This new algorithm is less accurate than the old "zero TSCs"
 * one, but we cannot zero TSCs anymore in the new hotplug CPU
 * model.
L
Linus Torvalds 已提交
136 137
 */

138 139 140
static atomic_t __cpuinitdata tsc_flag;
static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
static unsigned long long __cpuinitdata bp_tsc, ap_tsc;
L
Linus Torvalds 已提交
141 142 143

#define NR_LOOPS 5

144
static void __cpuinit sync_tsc_bp_init(int init)
L
Linus Torvalds 已提交
145
{
146 147 148 149 150 151
	if (init)
		_raw_spin_lock(&tsc_sync_lock);
	else
		_raw_spin_unlock(&tsc_sync_lock);
	atomic_set(&tsc_flag, 0);
}
L
Linus Torvalds 已提交
152

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
/*
 * Synchronize TSC on AP with BP.
 */
static void __cpuinit __sync_tsc_ap(void)
{
	if (!cpu_has_tsc)
		return;
	Dprintk("AP %d syncing TSC\n", smp_processor_id());

	while (atomic_read(&tsc_flag) != 0)
		cpu_relax();
	atomic_inc(&tsc_flag);
	mb();
	_raw_spin_lock(&tsc_sync_lock);
	wrmsrl(MSR_IA32_TSC, bp_tsc);
	_raw_spin_unlock(&tsc_sync_lock);
	rdtscll(ap_tsc);
	mb();
	atomic_inc(&tsc_flag);
	mb();
L
Linus Torvalds 已提交
173 174
}

175
static void __cpuinit sync_tsc_ap(void)
L
Linus Torvalds 已提交
176 177
{
	int i;
178 179 180
	for (i = 0; i < NR_LOOPS; i++)
		__sync_tsc_ap();
}
L
Linus Torvalds 已提交
181

182 183 184 185 186 187 188
/*
 * Synchronize TSC from BP to AP.
 */
static void __cpuinit __sync_tsc_bp(int cpu)
{
	if (!cpu_has_tsc)
		return;
L
Linus Torvalds 已提交
189

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
	/* Wait for AP */
	while (atomic_read(&tsc_flag) == 0)
		cpu_relax();
	/* Save BPs TSC */
	sync_core();
	rdtscll(bp_tsc);
	/* Don't do the sync core here to avoid too much latency. */
	mb();
	/* Start the AP */
	_raw_spin_unlock(&tsc_sync_lock);
	/* Wait for AP again */
	while (atomic_read(&tsc_flag) < 2)
		cpu_relax();
	rdtscl(bp_tsc);
	barrier();
}
L
Linus Torvalds 已提交
206

207 208 209 210 211 212
static void __cpuinit sync_tsc_bp(int cpu)
{
	int i;
	for (i = 0; i < NR_LOOPS - 1; i++) {
		__sync_tsc_bp(cpu);
		sync_tsc_bp_init(1);
L
Linus Torvalds 已提交
213
	}
214 215 216
	__sync_tsc_bp(cpu);
	printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n",
	       cpu, ap_tsc - bp_tsc);
L
Linus Torvalds 已提交
217 218
}

219
static atomic_t init_deasserted __cpuinitdata;
L
Linus Torvalds 已提交
220

221 222 223 224 225
/*
 * Report back to the Boot Processor.
 * Running on AP.
 */
void __cpuinit smp_callin(void)
L
Linus Torvalds 已提交
226 227 228 229 230 231 232 233 234 235
{
	int cpuid, phys_id;
	unsigned long timeout;

	/*
	 * If waken up by an INIT in an 82489DX configuration
	 * we may get here before an INIT-deassert IPI reaches
	 * our local APIC.  We have to wait for the IPI or we'll
	 * lock up on an APIC access.
	 */
236 237
	while (!atomic_read(&init_deasserted))
		cpu_relax();
L
Linus Torvalds 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267

	/*
	 * (This works even if the APIC is not enabled.)
	 */
	phys_id = GET_APIC_ID(apic_read(APIC_ID));
	cpuid = smp_processor_id();
	if (cpu_isset(cpuid, cpu_callin_map)) {
		panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
					phys_id, cpuid);
	}
	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);

	/*
	 * STARTUP IPIs are fragile beasts as they might sometimes
	 * trigger some glue motherboard logic. Complete APIC bus
	 * silence for 1 second, this overestimates the time the
	 * boot CPU is spending to send the up to 2 STARTUP IPIs
	 * by a factor of two. This should be enough.
	 */

	/*
	 * Waiting 2s total for startup (udelay is not yet working)
	 */
	timeout = jiffies + 2*HZ;
	while (time_before(jiffies, timeout)) {
		/*
		 * Has the boot CPU finished it's STARTUP sequence?
		 */
		if (cpu_isset(cpuid, cpu_callout_map))
			break;
268
		cpu_relax();
L
Linus Torvalds 已提交
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
	}

	if (!time_before(jiffies, timeout)) {
		panic("smp_callin: CPU%d started up but did not get a callout!\n",
			cpuid);
	}

	/*
	 * the boot CPU has finished the init stage and is spinning
	 * on callin_map until we finish. We are free to set up this
	 * CPU, first the APIC. (this is probably redundant on most
	 * boards)
	 */

	Dprintk("CALLIN, before setup_local_APIC().\n");
	setup_local_APIC();

	/*
	 * Get our bogomips.
	 */
	calibrate_delay();
	Dprintk("Stack at about %p\n",&cpuid);

	disable_APIC_timer();

	/*
	 * Save our processor parameters
	 */
 	smp_store_cpu_info(cpuid);

	/*
	 * Allow the master to continue.
	 */
	cpu_set(cpuid, cpu_callin_map);
}

/*
306
 * Setup code on secondary processor (after comming out of the trampoline)
L
Linus Torvalds 已提交
307
 */
308
void __cpuinit start_secondary(void)
L
Linus Torvalds 已提交
309 310 311 312 313 314 315 316 317
{
	/*
	 * Dont put anything before smp_callin(), SMP
	 * booting is too fragile that we want to limit the
	 * things done here to the most necessary things.
	 */
	cpu_init();
	smp_callin();

318 319 320 321 322
	/*
	 * Synchronize the TSC with the BP
	 */
	sync_tsc_ap();

L
Linus Torvalds 已提交
323 324 325 326 327 328
	/* otherwise gcc will move up the smp_processor_id before the cpu_init */
	barrier();

	Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); 	
	setup_secondary_APIC_clock();

329
	Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
L
Linus Torvalds 已提交
330 331 332 333 334 335 336 337

	if (nmi_watchdog == NMI_IO_APIC) {
		disable_8259A_irq(0);
		enable_NMI_through_LVT0(NULL);
		enable_8259A_irq(0);
	}


338
	enable_APIC_timer();
L
Linus Torvalds 已提交
339 340

	/*
341
	 * Allow the master to continue.
L
Linus Torvalds 已提交
342 343
	 */
	cpu_set(smp_processor_id(), cpu_online_map);
344 345
	mb();

L
Linus Torvalds 已提交
346 347 348
	cpu_idle();
}

349
extern volatile unsigned long init_rsp;
L
Linus Torvalds 已提交
350 351 352
extern void (*initial_code)(void);

#if APIC_DEBUG
353
static void inquire_remote_apic(int apicid)
L
Linus Torvalds 已提交
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
{
	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
	char *names[] = { "ID", "VERSION", "SPIV" };
	int timeout, status;

	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);

	for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
		printk("... APIC #%d %s: ", apicid, names[i]);

		/*
		 * Wait for idle.
		 */
		apic_wait_icr_idle();

		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);

		timeout = 0;
		do {
			udelay(100);
			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);

		switch (status) {
		case APIC_ICR_RR_VALID:
			status = apic_read(APIC_RRR);
			printk("%08x\n", status);
			break;
		default:
			printk("failed\n");
		}
	}
}
#endif

390 391 392 393
/*
 * Kick the secondary to wake up.
 */
static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
L
Linus Torvalds 已提交
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
{
	unsigned long send_status = 0, accept_status = 0;
	int maxlvt, timeout, num_starts, j;

	Dprintk("Asserting INIT.\n");

	/*
	 * Turn INIT on target chip
	 */
	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

	/*
	 * Send IPI
	 */
	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
				| APIC_DM_INIT);

	Dprintk("Waiting for send to finish...\n");
	timeout = 0;
	do {
		Dprintk("+");
		udelay(100);
		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
	} while (send_status && (timeout++ < 1000));

	mdelay(10);

	Dprintk("Deasserting INIT.\n");

	/* Target chip */
	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

	/* Send IPI */
	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);

	Dprintk("Waiting for send to finish...\n");
	timeout = 0;
	do {
		Dprintk("+");
		udelay(100);
		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
	} while (send_status && (timeout++ < 1000));

	atomic_set(&init_deasserted, 1);

	/*
	 * Should we send STARTUP IPIs ?
	 *
	 * Determine this based on the APIC version.
	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
	 */
	if (APIC_INTEGRATED(apic_version[phys_apicid]))
		num_starts = 2;
	else
		num_starts = 0;

	/*
	 * Run STARTUP IPI loop.
	 */
	Dprintk("#startup loops: %d.\n", num_starts);

	maxlvt = get_maxlvt();

	for (j = 1; j <= num_starts; j++) {
		Dprintk("Sending STARTUP #%d.\n",j);
		apic_read_around(APIC_SPIV);
		apic_write(APIC_ESR, 0);
		apic_read(APIC_ESR);
		Dprintk("After apic_write.\n");

		/*
		 * STARTUP IPI
		 */

		/* Target chip */
		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));

		/* Boot on the stack */
		/* Kick the second */
		apic_write_around(APIC_ICR, APIC_DM_STARTUP
					| (start_rip >> 12));

		/*
		 * Give the other CPU some time to accept the IPI.
		 */
		udelay(300);

		Dprintk("Startup point 1.\n");

		Dprintk("Waiting for send to finish...\n");
		timeout = 0;
		do {
			Dprintk("+");
			udelay(100);
			send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
		} while (send_status && (timeout++ < 1000));

		/*
		 * Give the other CPU some time to accept the IPI.
		 */
		udelay(200);
		/*
		 * Due to the Pentium erratum 3AP.
		 */
		if (maxlvt > 3) {
			apic_read_around(APIC_SPIV);
			apic_write(APIC_ESR, 0);
		}
		accept_status = (apic_read(APIC_ESR) & 0xEF);
		if (send_status || accept_status)
			break;
	}
	Dprintk("After Startup.\n");

	if (send_status)
		printk(KERN_ERR "APIC never delivered???\n");
	if (accept_status)
		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);

	return (send_status | accept_status);
}

516 517 518 519
/*
 * Boot one CPU.
 */
static int __cpuinit do_boot_cpu(int cpu, int apicid)
L
Linus Torvalds 已提交
520 521 522
{
	struct task_struct *idle;
	unsigned long boot_error;
523
	int timeout;
L
Linus Torvalds 已提交
524 525 526 527 528 529
	unsigned long start_rip;
	/*
	 * We can't use kernel_thread since we must avoid to
	 * reschedule the child.
	 */
	idle = fork_idle(cpu);
530 531 532 533
	if (IS_ERR(idle)) {
		printk("failed fork for CPU %d\n", cpu);
		return PTR_ERR(idle);
	}
L
Linus Torvalds 已提交
534 535 536 537 538 539
	x86_cpu_to_apicid[cpu] = apicid;

	cpu_pda[cpu].pcurrent = idle;

	start_rip = setup_trampoline();

540
	init_rsp = idle->thread.rsp;
L
Linus Torvalds 已提交
541 542 543 544
	per_cpu(init_tss,cpu).rsp0 = init_rsp;
	initial_code = start_secondary;
	clear_ti_thread_flag(idle->thread_info, TIF_FORK);

545
	printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
L
Linus Torvalds 已提交
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
	       start_rip, init_rsp);

	/*
	 * This grunge runs the startup process for
	 * the targeted processor.
	 */

	atomic_set(&init_deasserted, 0);

	Dprintk("Setting warm reset code and vector.\n");

	CMOS_WRITE(0xa, 0xf);
	local_flush_tlb();
	Dprintk("1.\n");
	*((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
	Dprintk("2.\n");
	*((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
	Dprintk("3.\n");

	/*
	 * Be paranoid about clearing APIC errors.
	 */
	if (APIC_INTEGRATED(apic_version[apicid])) {
		apic_read_around(APIC_SPIV);
		apic_write(APIC_ESR, 0);
		apic_read(APIC_ESR);
	}

	/*
	 * Status is now clean
	 */
	boot_error = 0;

	/*
	 * Starting actual IPI sequence...
	 */
582
	boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
L
Linus Torvalds 已提交
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622

	if (!boot_error) {
		/*
		 * allow APs to start initializing.
		 */
		Dprintk("Before Callout %d.\n", cpu);
		cpu_set(cpu, cpu_callout_map);
		Dprintk("After Callout %d.\n", cpu);

		/*
		 * Wait 5s total for a response
		 */
		for (timeout = 0; timeout < 50000; timeout++) {
			if (cpu_isset(cpu, cpu_callin_map))
				break;	/* It has booted */
			udelay(100);
		}

		if (cpu_isset(cpu, cpu_callin_map)) {
			/* number CPUs logically, starting from 1 (BSP is 0) */
			Dprintk("OK.\n");
			print_cpu_info(&cpu_data[cpu]);
			Dprintk("CPU has booted.\n");
		} else {
			boot_error = 1;
			if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
					== 0xA5)
				/* trampoline started but...? */
				printk("Stuck ??\n");
			else
				/* trampoline code not run */
				printk("Not responding.\n");
#if APIC_DEBUG
			inquire_remote_apic(apicid);
#endif
		}
	}
	if (boot_error) {
		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
		clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
623 624
		cpu_clear(cpu, cpu_present_map);
		cpu_clear(cpu, cpu_possible_map);
L
Linus Torvalds 已提交
625 626
		x86_cpu_to_apicid[cpu] = BAD_APICID;
		x86_cpu_to_log_apicid[cpu] = BAD_APICID;
627
		return -EIO;
L
Linus Torvalds 已提交
628
	}
629 630

	return 0;
L
Linus Torvalds 已提交
631 632
}

633 634 635 636 637 638 639 640
cycles_t cacheflush_time;
unsigned long cache_decay_ticks;

/*
 * Construct cpu_sibling_map[], so that we can tell the sibling CPU
 * on SMT systems efficiently.
 */
static __cpuinit void detect_siblings(void)
L
Linus Torvalds 已提交
641
{
642
	int cpu;
L
Linus Torvalds 已提交
643

644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
	for (cpu = 0; cpu < NR_CPUS; cpu++) {
		cpus_clear(cpu_sibling_map[cpu]);
		cpus_clear(cpu_core_map[cpu]);
	}

	for_each_online_cpu (cpu) {
		struct cpuinfo_x86 *c = cpu_data + cpu;
		int siblings = 0;
		int i;
		if (smp_num_siblings > 1) {
			for_each_online_cpu (i) {
				if (cpu_core_id[cpu] == phys_proc_id[i]) {
					siblings++;
					cpu_set(i, cpu_sibling_map[cpu]);
				}
			}
		} else {
			siblings++;
			cpu_set(cpu, cpu_sibling_map[cpu]);
		}

		if (siblings != smp_num_siblings) {
			printk(KERN_WARNING
	       "WARNING: %d siblings found for CPU%d, should be %d\n",
			       siblings, cpu, smp_num_siblings);
			smp_num_siblings = siblings;
L
Linus Torvalds 已提交
670
		}
671 672 673 674 675 676 677
		if (c->x86_num_cores > 1) {
			for_each_online_cpu(i) {
				if (phys_proc_id[cpu] == phys_proc_id[i])
					cpu_set(i, cpu_core_map[cpu]);
			}
		} else
			cpu_core_map[cpu] = cpu_sibling_map[cpu];
L
Linus Torvalds 已提交
678 679 680 681
	}
}

/*
682
 * Cleanup possible dangling ends...
L
Linus Torvalds 已提交
683
 */
684
static __cpuinit void smp_cleanup_boot(void)
L
Linus Torvalds 已提交
685
{
686 687 688 689 690
	/*
	 * Paranoid:  Set warm reset code and vector here back
	 * to default values.
	 */
	CMOS_WRITE(0, 0xf);
L
Linus Torvalds 已提交
691

692 693 694 695
	/*
	 * Reset trampoline flag
	 */
	*((volatile int *) phys_to_virt(0x467)) = 0;
L
Linus Torvalds 已提交
696

697
#ifndef CONFIG_HOTPLUG_CPU
L
Linus Torvalds 已提交
698
	/*
699 700 701
	 * Free pages reserved for SMP bootup.
	 * When you add hotplug CPU support later remove this
	 * Note there is more work to be done for later CPU bootup.
L
Linus Torvalds 已提交
702 703
	 */

704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
	free_page((unsigned long) __va(PAGE_SIZE));
	free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
#endif
}

/*
 * Fall back to non SMP mode after errors.
 *
 * RED-PEN audit/test this more. I bet there is more state messed up here.
 */
static __cpuinit void disable_smp(void)
{
	cpu_present_map = cpumask_of_cpu(0);
	cpu_possible_map = cpumask_of_cpu(0);
	if (smp_found_config)
		phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
	else
		phys_cpu_present_map = physid_mask_of_physid(0);
	cpu_set(0, cpu_sibling_map[0]);
	cpu_set(0, cpu_core_map[0]);
}

/*
 * Handle user cpus=... parameter.
 */
static __cpuinit void enforce_max_cpus(unsigned max_cpus)
{
	int i, k;
	k = 0;
	for (i = 0; i < NR_CPUS; i++) {
		if (!cpu_possible(i))
			continue;
		if (++k > max_cpus) {
			cpu_clear(i, cpu_possible_map);
			cpu_clear(i, cpu_present_map);
		}
	}
}
L
Linus Torvalds 已提交
742

743 744 745 746 747
/*
 * Various sanity checks.
 */
static int __cpuinit smp_sanity_check(unsigned max_cpus)
{
L
Linus Torvalds 已提交
748 749 750 751 752 753 754 755 756 757 758 759
	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
		       hard_smp_processor_id());
		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
	}

	/*
	 * If we couldn't find an SMP configuration at boot time,
	 * get out of here now!
	 */
	if (!smp_found_config) {
		printk(KERN_NOTICE "SMP motherboard not detected.\n");
760
		disable_smp();
L
Linus Torvalds 已提交
761 762 763
		if (APIC_init_uniprocessor())
			printk(KERN_NOTICE "Local APIC not detected."
					   " Using dummy APIC emulation.\n");
764
		return -1;
L
Linus Torvalds 已提交
765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
	}

	/*
	 * Should not be necessary because the MP table should list the boot
	 * CPU too, but we do it for the sake of robustness anyway.
	 */
	if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
		printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
								 boot_cpu_id);
		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
	}

	/*
	 * If we couldn't find a local APIC, then get out of here now!
	 */
	if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
			boot_cpu_id);
		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
784 785
		nr_ioapics = 0;
		return -1;
L
Linus Torvalds 已提交
786 787 788 789 790 791 792
	}

	/*
	 * If SMP should be disabled, then really disable it!
	 */
	if (!max_cpus) {
		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
793 794
		nr_ioapics = 0;
		return -1;
L
Linus Torvalds 已提交
795 796
	}

797 798
	return 0;
}
L
Linus Torvalds 已提交
799

800 801 802 803 804 805 806
/*
 * Prepare for SMP bootup.  The MP table or ACPI has been read
 * earlier.  Just do some sanity checking here and enable APIC mode.
 */
void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
{
	int i;
L
Linus Torvalds 已提交
807

808 809 810
	nmi_watchdog_default();
	current_cpu_data = boot_cpu_data;
	current_thread_info()->cpu = 0;  /* needed? */
L
Linus Torvalds 已提交
811

812
	enforce_max_cpus(max_cpus);
L
Linus Torvalds 已提交
813 814

	/*
815
	 * Fill in cpu_present_mask
L
Linus Torvalds 已提交
816
	 */
817 818 819 820 821 822 823 824
	for (i = 0; i < NR_CPUS; i++) {
		int apicid = cpu_present_to_apicid(i);
		if (physid_isset(apicid, phys_cpu_present_map)) {
			cpu_set(i, cpu_present_map);
			/* possible map would be different if we supported real
			   CPU hotplug. */
			cpu_set(i, cpu_possible_map);
		}
L
Linus Torvalds 已提交
825 826
	}

827 828 829 830
	if (smp_sanity_check(max_cpus) < 0) {
		printk(KERN_INFO "SMP disabled\n");
		disable_smp();
		return;
L
Linus Torvalds 已提交
831 832
	}

833

L
Linus Torvalds 已提交
834
	/*
835
	 * Switch from PIC to APIC mode.
L
Linus Torvalds 已提交
836
	 */
837 838
	connect_bsp_APIC();
	setup_local_APIC();
L
Linus Torvalds 已提交
839

840 841 842 843
	if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
		      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
		/* Or can we switch back to PIC here? */
L
Linus Torvalds 已提交
844
	}
845
	x86_cpu_to_apicid[0] = boot_cpu_id;
L
Linus Torvalds 已提交
846 847

	/*
848
	 * Now start the IO-APICs
L
Linus Torvalds 已提交
849 850 851 852 853 854 855
	 */
	if (!skip_ioapic_setup && nr_ioapics)
		setup_IO_APIC();
	else
		nr_ioapics = 0;

	/*
856
	 * Set up local APIC timer on boot CPU.
L
Linus Torvalds 已提交
857 858
	 */

859
	setup_boot_APIC_clock();
L
Linus Torvalds 已提交
860 861
}

862 863 864 865
/*
 * Early setup to make printk work.
 */
void __init smp_prepare_boot_cpu(void)
L
Linus Torvalds 已提交
866
{
867 868 869
	int me = smp_processor_id();
	cpu_set(me, cpu_online_map);
	cpu_set(me, cpu_callout_map);
L
Linus Torvalds 已提交
870 871
}

872 873 874 875 876 877 878
/*
 * Entry point to boot a CPU.
 *
 * This is all __cpuinit, not __devinit for now because we don't support
 * CPU hotplug (yet).
 */
int __cpuinit __cpu_up(unsigned int cpu)
L
Linus Torvalds 已提交
879
{
880 881
	int err;
	int apicid = cpu_present_to_apicid(cpu);
L
Linus Torvalds 已提交
882

883
	WARN_ON(irqs_disabled());
L
Linus Torvalds 已提交
884

885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
	Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);

	if (apicid == BAD_APICID || apicid == boot_cpu_id ||
	    !physid_isset(apicid, phys_cpu_present_map)) {
		printk("__cpu_up: bad cpu %d\n", cpu);
		return -EINVAL;
	}
	sync_tsc_bp_init(1);

	/* Boot it! */
	err = do_boot_cpu(cpu, apicid);
	if (err < 0) {
		sync_tsc_bp_init(0);
		Dprintk("do_boot_cpu failed %d\n", err);
		return err;
L
Linus Torvalds 已提交
900
	}
901 902

	sync_tsc_bp(cpu);
L
Linus Torvalds 已提交
903 904 905 906 907

	/* Unleash the CPU! */
	Dprintk("waiting for cpu %d\n", cpu);

	while (!cpu_isset(cpu, cpu_online_map))
908
		cpu_relax();
L
Linus Torvalds 已提交
909 910 911
	return 0;
}

912 913 914 915
/*
 * Finish the SMP boot.
 */
void __cpuinit smp_cpus_done(unsigned int max_cpus)
L
Linus Torvalds 已提交
916
{
917 918 919
	zap_low_mappings();
	smp_cleanup_boot();

L
Linus Torvalds 已提交
920 921 922 923
#ifdef CONFIG_X86_IO_APIC
	setup_ioapic_dest();
#endif

924 925 926
	detect_siblings();
	time_init_gtod();
}