smp.c 23.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * SMP support for ppc.
 *
 * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
 * deal of code from the sparc and intel versions.
 *
 * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
 *
 * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#undef DEBUG

#include <linux/kernel.h>
21
#include <linux/export.h>
22
#include <linux/sched/mm.h>
23
#include <linux/sched/topology.h>
L
Linus Torvalds 已提交
24 25 26 27 28 29 30
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/cache.h>
#include <linux/err.h>
31
#include <linux/device.h>
L
Linus Torvalds 已提交
32 33
#include <linux/cpu.h>
#include <linux/notifier.h>
34
#include <linux/topology.h>
35
#include <linux/profile.h>
L
Linus Torvalds 已提交
36 37

#include <asm/ptrace.h>
A
Arun Sharma 已提交
38
#include <linux/atomic.h>
L
Linus Torvalds 已提交
39
#include <asm/irq.h>
40
#include <asm/hw_irq.h>
41
#include <asm/kvm_ppc.h>
42
#include <asm/dbell.h>
L
Linus Torvalds 已提交
43 44 45 46 47 48
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/machdep.h>
49
#include <asm/cputhreads.h>
L
Linus Torvalds 已提交
50
#include <asm/cputable.h>
51
#include <asm/mpic.h>
52
#include <asm/vdso_datapage.h>
P
Paul Mackerras 已提交
53 54 55
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
56
#include <asm/vdso.h>
57
#include <asm/debug.h>
58
#include <asm/kexec.h>
59
#include <asm/asm-prototypes.h>
60
#include <asm/cpu_has_feature.h>
P
Paul Mackerras 已提交
61

L
Linus Torvalds 已提交
62
#ifdef DEBUG
63
#include <asm/udbg.h>
L
Linus Torvalds 已提交
64 65 66 67 68
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif

69
#ifdef CONFIG_HOTPLUG_CPU
70 71
/* State of each CPU during hotplug phases */
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
72 73
#endif

74 75
struct thread_info *secondary_ti;

76 77
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
L
Linus Torvalds 已提交
78

79
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
80
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
L
Linus Torvalds 已提交
81

P
Paul Mackerras 已提交
82
/* SMP operations for this machine */
L
Linus Torvalds 已提交
83 84
struct smp_ops_t *smp_ops;

85 86
/* Can't be static due to PowerMac hackery */
volatile unsigned int cpu_callin_map[NR_CPUS];
L
Linus Torvalds 已提交
87 88 89

int smt_enabled_at_boot = 1;

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
/*
 * Returns 1 if the specified cpu should be brought up during boot.
 * Used to inhibit booting threads if they've been disabled or
 * limited on the command line
 */
int smp_generic_cpu_bootable(unsigned int nr)
{
	/* Special case - we inhibit secondary thread startup
	 * during boot if the user requests it.
	 */
	if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
			return 0;
		if (smt_enabled_at_boot
		    && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
			return 0;
	}

	return 1;
}


P
Paul Mackerras 已提交
112
#ifdef CONFIG_PPC64
113
int smp_generic_kick_cpu(int nr)
L
Linus Torvalds 已提交
114 115 116 117 118 119 120 121
{
	BUG_ON(nr < 0 || nr >= NR_CPUS);

	/*
	 * The processor is currently spinning, waiting for the
	 * cpu_start field to become non-zero After we set cpu_start,
	 * the processor will continue on to secondary_start
	 */
122 123 124 125 126 127 128 129 130 131 132
	if (!paca[nr].cpu_start) {
		paca[nr].cpu_start = 1;
		smp_mb();
		return 0;
	}

#ifdef CONFIG_HOTPLUG_CPU
	/*
	 * Ok it's not there, so it might be soft-unplugged, let's
	 * try to bring it back
	 */
133
	generic_set_cpu_up(nr);
134 135 136
	smp_wmb();
	smp_send_reschedule(nr);
#endif /* CONFIG_HOTPLUG_CPU */
137 138

	return 0;
L
Linus Torvalds 已提交
139
}
140
#endif /* CONFIG_PPC64 */
L
Linus Torvalds 已提交
141

142 143 144 145 146 147 148 149
static irqreturn_t call_function_action(int irq, void *data)
{
	generic_smp_call_function_interrupt();
	return IRQ_HANDLED;
}

static irqreturn_t reschedule_action(int irq, void *data)
{
150
	scheduler_ipi();
151 152 153
	return IRQ_HANDLED;
}

154
static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
155
{
156
	tick_broadcast_ipi_handler();
157 158 159
	return IRQ_HANDLED;
}

160 161
#ifdef CONFIG_NMI_IPI
static irqreturn_t nmi_ipi_action(int irq, void *data)
162
{
163
	smp_handle_nmi_ipi(get_irq_regs());
164 165
	return IRQ_HANDLED;
}
166
#endif
167 168 169 170

static irq_handler_t smp_ipi_action[] = {
	[PPC_MSG_CALL_FUNCTION] =  call_function_action,
	[PPC_MSG_RESCHEDULE] = reschedule_action,
171
	[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
172 173 174
#ifdef CONFIG_NMI_IPI
	[PPC_MSG_NMI_IPI] = nmi_ipi_action,
#endif
175 176
};

177 178 179 180 181
/*
 * The NMI IPI is a fallback and not truly non-maskable. It is simpler
 * than going through the call function infrastructure, and strongly
 * serialized, so it is more appropriate for debugging.
 */
182 183 184
const char *smp_ipi_name[] = {
	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",
	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
185
	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
186
	[PPC_MSG_NMI_IPI] = "nmi ipi",
187 188 189 190 191 192 193
};

/* optional function to request ipi, for controllers with >= 4 ipis */
int smp_request_message_ipi(int virq, int msg)
{
	int err;

194
	if (msg < 0 || msg > PPC_MSG_NMI_IPI)
195
		return -EINVAL;
196 197
#ifndef CONFIG_NMI_IPI
	if (msg == PPC_MSG_NMI_IPI)
198 199
		return 1;
#endif
200

201
	err = request_irq(virq, smp_ipi_action[msg],
202
			  IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
203
			  smp_ipi_name[msg], NULL);
204 205 206 207 208 209
	WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
		virq, smp_ipi_name[msg], err);

	return err;
}

210
#ifdef CONFIG_PPC_SMP_MUXED_IPI
211
struct cpu_messages {
212
	long messages;			/* current messages */
213 214 215
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);

216
void smp_muxed_ipi_set_message(int cpu, int msg)
217 218
{
	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
219
	char *message = (char *)&info->messages;
220

221 222 223 224
	/*
	 * Order previous accesses before accesses in the IPI handler.
	 */
	smp_mb();
225
	message[msg] = 1;
226 227 228 229 230
}

void smp_muxed_ipi_message_pass(int cpu, int msg)
{
	smp_muxed_ipi_set_message(cpu, msg);
231

232 233 234 235
	/*
	 * cause_ipi functions are required to include a full barrier
	 * before doing whatever causes the IPI.
	 */
236
	smp_ops->cause_ipi(cpu);
237 238
}

239
#ifdef __BIG_ENDIAN__
240
#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
241
#else
242
#define IPI_MESSAGE(A) (1uL << (8 * (A)))
243 244
#endif

245
irqreturn_t smp_ipi_demux(void)
246 247 248 249 250 251 252 253
{
	mb();	/* order any irq clear */

	return smp_ipi_demux_relaxed();
}

/* sync-free variant. Callers should ensure synchronization */
irqreturn_t smp_ipi_demux_relaxed(void)
254
{
255
	struct cpu_messages *info;
256
	unsigned long all;
257

258
	info = this_cpu_ptr(&ipi_message);
259
	do {
260
		all = xchg(&info->messages, 0);
261 262 263 264 265 266 267 268 269 270 271
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
		/*
		 * Must check for PPC_MSG_RM_HOST_ACTION messages
		 * before PPC_MSG_CALL_FUNCTION messages because when
		 * a VM is destroyed, we call kick_all_cpus_sync()
		 * to ensure that any pending PPC_MSG_RM_HOST_ACTION
		 * messages have completed before we free any VCPUs.
		 */
		if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
			kvmppc_xics_ipi_action();
#endif
272
		if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
273
			generic_smp_call_function_interrupt();
274
		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
275
			scheduler_ipi();
276 277
		if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
			tick_broadcast_ipi_handler();
278 279 280 281
#ifdef CONFIG_NMI_IPI
		if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
			nmi_ipi_action(0, NULL);
#endif
282 283
	} while (info->messages);

284 285
	return IRQ_HANDLED;
}
286
#endif /* CONFIG_PPC_SMP_MUXED_IPI */
287

288 289 290 291 292 293 294 295 296 297
static inline void do_message_pass(int cpu, int msg)
{
	if (smp_ops->message_pass)
		smp_ops->message_pass(cpu, msg);
#ifdef CONFIG_PPC_SMP_MUXED_IPI
	else
		smp_muxed_ipi_message_pass(cpu, msg);
#endif
}

L
Linus Torvalds 已提交
298 299
void smp_send_reschedule(int cpu)
{
300
	if (likely(smp_ops))
301
		do_message_pass(cpu, PPC_MSG_RESCHEDULE);
L
Linus Torvalds 已提交
302
}
303
EXPORT_SYMBOL_GPL(smp_send_reschedule);
L
Linus Torvalds 已提交
304

305 306
void arch_send_call_function_single_ipi(int cpu)
{
307
	do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
308 309
}

310
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
311 312 313
{
	unsigned int cpu;

314
	for_each_cpu(cpu, mask)
315
		do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
316 317
}

318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
#ifdef CONFIG_NMI_IPI

/*
 * "NMI IPI" system.
 *
 * NMI IPIs may not be recoverable, so should not be used as ongoing part of
 * a running system. They can be used for crash, debug, halt/reboot, etc.
 *
 * NMI IPIs are globally single threaded. No more than one in progress at
 * any time.
 *
 * The IPI call waits with interrupts disabled until all targets enter the
 * NMI handler, then the call returns.
 *
 * No new NMI can be initiated until targets exit the handler.
 *
 * The IPI call may time out without all targets entering the NMI handler.
 * In that case, there is some logic to recover (and ignore subsequent
 * NMI interrupts that may eventually be raised), but the platform interrupt
 * handler may not be able to distinguish this from other exception causes,
 * which may cause a crash.
 */

static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
static struct cpumask nmi_ipi_pending_mask;
static int nmi_ipi_busy_count = 0;
static void (*nmi_ipi_function)(struct pt_regs *) = NULL;

static void nmi_ipi_lock_start(unsigned long *flags)
{
	raw_local_irq_save(*flags);
	hard_irq_disable();
	while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
		raw_local_irq_restore(*flags);
		cpu_relax();
		raw_local_irq_save(*flags);
		hard_irq_disable();
	}
}

static void nmi_ipi_lock(void)
{
	while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
		cpu_relax();
}

static void nmi_ipi_unlock(void)
{
	smp_mb();
	WARN_ON(atomic_read(&__nmi_ipi_lock) != 1);
	atomic_set(&__nmi_ipi_lock, 0);
}

static void nmi_ipi_unlock_end(unsigned long *flags)
{
	nmi_ipi_unlock();
	raw_local_irq_restore(*flags);
}

/*
 * Platform NMI handler calls this to ack
 */
int smp_handle_nmi_ipi(struct pt_regs *regs)
{
	void (*fn)(struct pt_regs *);
	unsigned long flags;
	int me = raw_smp_processor_id();
	int ret = 0;

	/*
	 * Unexpected NMIs are possible here because the interrupt may not
	 * be able to distinguish NMI IPIs from other types of NMIs, or
	 * because the caller may have timed out.
	 */
	nmi_ipi_lock_start(&flags);
	if (!nmi_ipi_busy_count)
		goto out;
	if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
		goto out;

	fn = nmi_ipi_function;
	if (!fn)
		goto out;

	cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
	nmi_ipi_busy_count++;
	nmi_ipi_unlock();

	ret = 1;

	fn(regs);

	nmi_ipi_lock();
	nmi_ipi_busy_count--;
out:
	nmi_ipi_unlock_end(&flags);

	return ret;
}

static void do_smp_send_nmi_ipi(int cpu)
{
420 421 422
	if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
		return;

423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
	if (cpu >= 0) {
		do_message_pass(cpu, PPC_MSG_NMI_IPI);
	} else {
		int c;

		for_each_online_cpu(c) {
			if (c == raw_smp_processor_id())
				continue;
			do_message_pass(c, PPC_MSG_NMI_IPI);
		}
	}
}

/*
 * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
 * - fn is the target callback function.
 * - delay_us > 0 is the delay before giving up waiting for targets to
 *   enter the handler, == 0 specifies indefinite delay.
 */
static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
{
	unsigned long flags;
	int me = raw_smp_processor_id();
	int ret = 1;

	BUG_ON(cpu == me);
	BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);

	if (unlikely(!smp_ops))
		return 0;

	/* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
	nmi_ipi_lock_start(&flags);
	while (nmi_ipi_busy_count) {
		nmi_ipi_unlock_end(&flags);
		cpu_relax();
		nmi_ipi_lock_start(&flags);
	}

	nmi_ipi_function = fn;

	if (cpu < 0) {
		/* ALL_OTHERS */
		cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
		cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
	} else {
		/* cpumask starts clear */
		cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
	}
	nmi_ipi_busy_count++;
	nmi_ipi_unlock();

	do_smp_send_nmi_ipi(cpu);

	while (!cpumask_empty(&nmi_ipi_pending_mask)) {
		udelay(1);
		if (delay_us) {
			delay_us--;
			if (!delay_us)
				break;
		}
	}

	nmi_ipi_lock();
	if (!cpumask_empty(&nmi_ipi_pending_mask)) {
		/* Could not gather all CPUs */
		ret = 0;
		cpumask_clear(&nmi_ipi_pending_mask);
	}
	nmi_ipi_busy_count--;
	nmi_ipi_unlock_end(&flags);

	return ret;
}
#endif /* CONFIG_NMI_IPI */

499 500 501 502 503 504 505 506 507 508
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
	unsigned int cpu;

	for_each_cpu(cpu, mask)
		do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
}
#endif

509 510
#ifdef CONFIG_DEBUGGER
void debugger_ipi_callback(struct pt_regs *regs)
L
Linus Torvalds 已提交
511
{
512 513
	debugger_ipi(regs);
}
514

515 516 517
void smp_send_debugger_break(void)
{
	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
L
Linus Torvalds 已提交
518 519 520
}
#endif

521
#ifdef CONFIG_KEXEC_CORE
522 523
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
524
	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
525 526 527
}
#endif

L
Linus Torvalds 已提交
528 529
static void stop_this_cpu(void *dummy)
{
530 531 532
	/* Remove this CPU */
	set_cpu_online(smp_processor_id(), false);

L
Linus Torvalds 已提交
533 534 535 536 537
	local_irq_disable();
	while (1)
		;
}

538 539
void smp_send_stop(void)
{
540
	smp_call_function(stop_this_cpu, NULL, 0);
L
Linus Torvalds 已提交
541 542 543 544
}

struct thread_info *current_set[NR_CPUS];

545
static void smp_store_cpu_info(int id)
L
Linus Torvalds 已提交
546
{
547
	per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
548 549 550 551
#ifdef CONFIG_PPC_FSL_BOOK3E
	per_cpu(next_tlbcam_idx, id)
		= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
#endif
L
Linus Torvalds 已提交
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
}

void __init smp_prepare_cpus(unsigned int max_cpus)
{
	unsigned int cpu;

	DBG("smp_prepare_cpus\n");

	/* 
	 * setup_cpu may need to be called on the boot cpu. We havent
	 * spun any cpus up but lets be paranoid.
	 */
	BUG_ON(boot_cpuid != smp_processor_id());

	/* Fixup boot cpu */
	smp_store_cpu_info(boot_cpuid);
	cpu_callin_map[boot_cpuid] = 1;

570 571 572 573 574
	for_each_possible_cpu(cpu) {
		zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
					GFP_KERNEL, cpu_to_node(cpu));
		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
					GFP_KERNEL, cpu_to_node(cpu));
575 576 577
		/*
		 * numa_node_id() works after this.
		 */
578 579 580 581 582
		if (cpu_present(cpu)) {
			set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
			set_cpu_numa_mem(cpu,
				local_memory_node(numa_cpu_lookup_table[cpu]));
		}
583 584 585 586 587
	}

	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));

588 589
	if (smp_ops && smp_ops->probe)
		smp_ops->probe();
L
Linus Torvalds 已提交
590 591
}

592
void smp_prepare_boot_cpu(void)
L
Linus Torvalds 已提交
593 594
{
	BUG_ON(smp_processor_id() != boot_cpuid);
P
Paul Mackerras 已提交
595
#ifdef CONFIG_PPC64
L
Linus Torvalds 已提交
596
	paca[boot_cpuid].__current = current;
P
Paul Mackerras 已提交
597
#endif
598
	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
A
Al Viro 已提交
599
	current_set[boot_cpuid] = task_thread_info(current);
L
Linus Torvalds 已提交
600 601 602 603 604 605 606 607 608 609 610
}

#ifdef CONFIG_HOTPLUG_CPU

int generic_cpu_disable(void)
{
	unsigned int cpu = smp_processor_id();

	if (cpu == boot_cpuid)
		return -EBUSY;

611
	set_cpu_online(cpu, false);
612
#ifdef CONFIG_PPC64
613
	vdso_data->processorCount--;
614
#endif
615 616 617
	/* Update affinity of all IRQs previously aimed at this CPU */
	irq_migrate_all_off_this_cpu();

618 619 620 621 622 623 624 625
	/*
	 * Depending on the details of the interrupt controller, it's possible
	 * that one of the interrupts we just migrated away from this CPU is
	 * actually already pending on this CPU. If we leave it in that state
	 * the interrupt will never be EOI'ed, and will never fire again. So
	 * temporarily enable interrupts here, to allow any pending interrupt to
	 * be received (and EOI'ed), before we take this CPU offline.
	 */
626 627 628 629
	local_irq_enable();
	mdelay(1);
	local_irq_disable();

L
Linus Torvalds 已提交
630 631 632 633 634 635 636 637
	return 0;
}

void generic_cpu_die(unsigned int cpu)
{
	int i;

	for (i = 0; i < 100; i++) {
638
		smp_rmb();
639
		if (is_cpu_dead(cpu))
L
Linus Torvalds 已提交
640 641 642 643 644 645
			return;
		msleep(100);
	}
	printk(KERN_ERR "CPU%d didn't die...\n", cpu);
}

646 647 648 649
void generic_set_cpu_dead(unsigned int cpu)
{
	per_cpu(cpu_state, cpu) = CPU_DEAD;
}
650

651 652 653 654 655 656 657 658 659 660
/*
 * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
 * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
 * which makes the delay in generic_cpu_die() not happen.
 */
void generic_set_cpu_up(unsigned int cpu)
{
	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
}

661 662 663 664
int generic_check_cpu_restart(unsigned int cpu)
{
	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
}
665

666 667 668 669 670
int is_cpu_dead(unsigned int cpu)
{
	return per_cpu(cpu_state, cpu) == CPU_DEAD;
}

671
static bool secondaries_inhibited(void)
672
{
673
	return kvm_hv_mode_active();
674 675 676 677 678 679
}

#else /* HOTPLUG_CPU */

#define secondaries_inhibited()		0

L
Linus Torvalds 已提交
680 681
#endif

682
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
683
{
684
	struct thread_info *ti = task_thread_info(idle);
685 686

#ifdef CONFIG_PPC64
687
	paca[cpu].__current = idle;
688 689 690
	paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
	ti->cpu = cpu;
691
	secondary_ti = current_set[cpu] = ti;
692 693
}

694
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
L
Linus Torvalds 已提交
695
{
696
	int rc, c;
L
Linus Torvalds 已提交
697

698 699 700 701
	/*
	 * Don't allow secondary threads to come online if inhibited
	 */
	if (threads_per_core > 1 && secondaries_inhibited() &&
702
	    cpu_thread_in_subcore(cpu))
703 704
		return -EBUSY;

705 706
	if (smp_ops == NULL ||
	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
L
Linus Torvalds 已提交
707 708
		return -EINVAL;

709
	cpu_idle_thread_init(cpu, tidle);
710

711 712 713 714 715 716 717 718 719 720
	/*
	 * The platform might need to allocate resources prior to bringing
	 * up the CPU
	 */
	if (smp_ops->prepare_cpu) {
		rc = smp_ops->prepare_cpu(cpu);
		if (rc)
			return rc;
	}

L
Linus Torvalds 已提交
721 722 723 724 725 726 727 728 729
	/* Make sure callin-map entry is 0 (can be leftover a CPU
	 * hotplug
	 */
	cpu_callin_map[cpu] = 0;

	/* The information for processor bringup must
	 * be written out to main store before we release
	 * the processor.
	 */
730
	smp_mb();
L
Linus Torvalds 已提交
731 732 733

	/* wake up cpus */
	DBG("smp: kicking cpu %d\n", cpu);
734 735 736 737 738
	rc = smp_ops->kick_cpu(cpu);
	if (rc) {
		pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
		return rc;
	}
L
Linus Torvalds 已提交
739 740 741 742 743 744 745

	/*
	 * wait to see if the cpu made a callin (is actually up).
	 * use this value that I found through experimentation.
	 * -- Cort
	 */
	if (system_state < SYSTEM_RUNNING)
746
		for (c = 50000; c && !cpu_callin_map[cpu]; c--)
L
Linus Torvalds 已提交
747 748 749 750 751 752 753
			udelay(100);
#ifdef CONFIG_HOTPLUG_CPU
	else
		/*
		 * CPUs can take much longer to come up in the
		 * hotplug case.  Wait five seconds.
		 */
754 755
		for (c = 5000; c && !cpu_callin_map[cpu]; c--)
			msleep(1);
L
Linus Torvalds 已提交
756 757 758
#endif

	if (!cpu_callin_map[cpu]) {
759
		printk(KERN_ERR "Processor %u is stuck.\n", cpu);
L
Linus Torvalds 已提交
760 761 762
		return -ENOENT;
	}

763
	DBG("Processor %u found.\n", cpu);
L
Linus Torvalds 已提交
764 765 766 767

	if (smp_ops->give_timebase)
		smp_ops->give_timebase();

768
	/* Wait until cpu puts itself in the online & active maps */
769
	while (!cpu_online(cpu))
L
Linus Torvalds 已提交
770 771 772 773 774
		cpu_relax();

	return 0;
}

775 776 777 778 779 780
/* Return the value of the reg property corresponding to the given
 * logical cpu.
 */
int cpu_to_core_id(int cpu)
{
	struct device_node *np;
781
	const __be32 *reg;
782 783 784 785 786 787 788 789 790 791
	int id = -1;

	np = of_get_cpu_node(cpu, NULL);
	if (!np)
		goto out;

	reg = of_get_property(np, "reg", NULL);
	if (!reg)
		goto out;

792
	id = be32_to_cpup(reg);
793 794 795 796
out:
	of_node_put(np);
	return id;
}
797
EXPORT_SYMBOL_GPL(cpu_to_core_id);
798

799 800 801 802 803 804 805 806 807 808 809 810 811
/* Helper routines for cpu to core mapping */
int cpu_core_index_of_thread(int cpu)
{
	return cpu >> threads_shift;
}
EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);

int cpu_first_thread_of_core(int core)
{
	return core << threads_shift;
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);

812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
{
	const struct cpumask *mask;
	struct device_node *np;
	int i, plen;
	const __be32 *prop;

	mask = add ? cpu_online_mask : cpu_present_mask;
	for_each_cpu(i, mask) {
		np = of_get_cpu_node(i, NULL);
		if (!np)
			continue;
		prop = of_get_property(np, "ibm,chip-id", &plen);
		if (prop && plen == sizeof(int) &&
		    of_read_number(prop, 1) == chipid) {
			if (add) {
				cpumask_set_cpu(cpu, cpu_core_mask(i));
				cpumask_set_cpu(i, cpu_core_mask(cpu));
			} else {
				cpumask_clear_cpu(cpu, cpu_core_mask(i));
				cpumask_clear_cpu(i, cpu_core_mask(cpu));
			}
		}
		of_node_put(np);
	}
}

839
/* Must be called when no change can occur to cpu_present_mask,
840 841 842 843 844
 * i.e. during cpu online or offline.
 */
static struct device_node *cpu_to_l2cache(int cpu)
{
	struct device_node *np;
845
	struct device_node *cache;
846 847 848 849 850 851 852 853

	if (!cpu_present(cpu))
		return NULL;

	np = of_get_cpu_node(cpu, NULL);
	if (np == NULL)
		return NULL;

854 855
	cache = of_find_next_cache_node(np);

856 857
	of_node_put(np);

858
	return cache;
859
}
L
Linus Torvalds 已提交
860

861 862
static void traverse_core_siblings(int cpu, bool add)
{
863
	struct device_node *l2_cache, *np;
864
	const struct cpumask *mask;
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880
	int i, chip, plen;
	const __be32 *prop;

	/* First see if we have ibm,chip-id properties in cpu nodes */
	np = of_get_cpu_node(cpu, NULL);
	if (np) {
		chip = -1;
		prop = of_get_property(np, "ibm,chip-id", &plen);
		if (prop && plen == sizeof(int))
			chip = of_read_number(prop, 1);
		of_node_put(np);
		if (chip >= 0) {
			traverse_siblings_chip_id(cpu, add, chip);
			return;
		}
	}
881 882 883 884

	l2_cache = cpu_to_l2cache(cpu);
	mask = add ? cpu_online_mask : cpu_present_mask;
	for_each_cpu(i, mask) {
885
		np = cpu_to_l2cache(i);
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901
		if (!np)
			continue;
		if (np == l2_cache) {
			if (add) {
				cpumask_set_cpu(cpu, cpu_core_mask(i));
				cpumask_set_cpu(i, cpu_core_mask(cpu));
			} else {
				cpumask_clear_cpu(cpu, cpu_core_mask(i));
				cpumask_clear_cpu(i, cpu_core_mask(cpu));
			}
		}
		of_node_put(np);
	}
	of_node_put(l2_cache);
}

L
Linus Torvalds 已提交
902
/* Activate a secondary processor. */
903
void start_secondary(void *unused)
L
Linus Torvalds 已提交
904 905
{
	unsigned int cpu = smp_processor_id();
906
	int i, base;
L
Linus Torvalds 已提交
907

V
Vegard Nossum 已提交
908
	mmgrab(&init_mm);
L
Linus Torvalds 已提交
909 910 911
	current->active_mm = &init_mm;

	smp_store_cpu_info(cpu);
P
Paul Mackerras 已提交
912
	set_dec(tb_ticks_per_jiffy);
A
Andrew Morton 已提交
913
	preempt_disable();
914
	cpu_callin_map[cpu] = 1;
L
Linus Torvalds 已提交
915

916 917
	if (smp_ops->setup_cpu)
		smp_ops->setup_cpu(cpu);
L
Linus Torvalds 已提交
918 919 920
	if (smp_ops->take_timebase)
		smp_ops->take_timebase();

921 922
	secondary_cpu_time_init();

923 924 925
#ifdef CONFIG_PPC64
	if (system_state == SYSTEM_RUNNING)
		vdso_data->processorCount++;
926 927

	vdso_getcpu_init();
928
#endif
929
	/* Update sibling maps */
930
	base = cpu_first_thread_sibling(cpu);
931
	for (i = 0; i < threads_per_core; i++) {
932
		if (cpu_is_offline(base + i) && (cpu != base + i))
933
			continue;
934 935
		cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
		cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
936 937 938 939 940

		/* cpu_core_map should be a superset of
		 * cpu_sibling_map even if we don't have cache
		 * information, so update the former here, too.
		 */
941 942
		cpumask_set_cpu(cpu, cpu_core_mask(base + i));
		cpumask_set_cpu(base + i, cpu_core_mask(cpu));
943
	}
944
	traverse_core_siblings(cpu, true);
L
Linus Torvalds 已提交
945

946 947 948
	set_numa_node(numa_cpu_lookup_table[cpu]);
	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));

949 950 951 952
	smp_wmb();
	notify_cpu_starting(cpu);
	set_cpu_online(cpu, true);

L
Linus Torvalds 已提交
953 954
	local_irq_enable();

955
	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
956 957

	BUG();
L
Linus Torvalds 已提交
958 959 960 961 962 963 964
}

int setup_profiling_timer(unsigned int multiplier)
{
	return 0;
}

965 966
#ifdef CONFIG_SCHED_SMT
/* cpumask of CPUs with asymetric SMT dependancy */
G
Guenter Roeck 已提交
967
static int powerpc_smt_flags(void)
968
{
969
	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986

	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
		flags |= SD_ASYM_PACKING;
	}
	return flags;
}
#endif

static struct sched_domain_topology_level powerpc_topology[] = {
#ifdef CONFIG_SCHED_SMT
	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
#endif
	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
	{ NULL, },
};

L
Linus Torvalds 已提交
987 988
void __init smp_cpus_done(unsigned int max_cpus)
{
989
	cpumask_var_t old_mask;
L
Linus Torvalds 已提交
990 991 992 993 994

	/* We want the setup_cpu() here to be called from CPU 0, but our
	 * init thread may have been "borrowed" by another CPU in the meantime
	 * se we pin us down to CPU 0 for a short while
	 */
995
	alloc_cpumask_var(&old_mask, GFP_NOWAIT);
996
	cpumask_copy(old_mask, &current->cpus_allowed);
J
Julia Lawall 已提交
997
	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
L
Linus Torvalds 已提交
998
	
999
	if (smp_ops && smp_ops->setup_cpu)
1000
		smp_ops->setup_cpu(boot_cpuid);
L
Linus Torvalds 已提交
1001

1002 1003 1004
	set_cpus_allowed_ptr(current, old_mask);

	free_cpumask_var(old_mask);
1005

1006 1007 1008
	if (smp_ops && smp_ops->bringup_done)
		smp_ops->bringup_done();

1009
	dump_numa_cpu_topology();
1010

1011
	set_sched_topology(powerpc_topology);
L
Linus Torvalds 已提交
1012

1013 1014
}

L
Linus Torvalds 已提交
1015 1016 1017
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
1018 1019 1020
	int cpu = smp_processor_id();
	int base, i;
	int err;
L
Linus Torvalds 已提交
1021

1022 1023 1024 1025 1026 1027 1028 1029
	if (!smp_ops->cpu_disable)
		return -ENOSYS;

	err = smp_ops->cpu_disable();
	if (err)
		return err;

	/* Update sibling maps */
1030
	base = cpu_first_thread_sibling(cpu);
1031
	for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
1032 1033 1034 1035
		cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
		cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
		cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
		cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
1036
	}
1037
	traverse_core_siblings(cpu, false);
1038 1039

	return 0;
L
Linus Torvalds 已提交
1040 1041 1042 1043 1044 1045 1046
}

void __cpu_die(unsigned int cpu)
{
	if (smp_ops->cpu_die)
		smp_ops->cpu_die(cpu);
}
1047

1048 1049 1050 1051
void cpu_die(void)
{
	if (ppc_md.cpu_die)
		ppc_md.cpu_die();
1052 1053 1054

	/* If we return, we re-enter start_secondary */
	start_secondary_resume();
1055
}
1056

L
Linus Torvalds 已提交
1057
#endif