smp.c 23.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * SMP support for ppc.
 *
 * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
 * deal of code from the sparc and intel versions.
 *
 * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
 *
 * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#undef DEBUG

#include <linux/kernel.h>
21
#include <linux/export.h>
22
#include <linux/sched/mm.h>
23
#include <linux/sched/topology.h>
L
Linus Torvalds 已提交
24 25 26 27 28 29 30
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/cache.h>
#include <linux/err.h>
31
#include <linux/device.h>
L
Linus Torvalds 已提交
32 33
#include <linux/cpu.h>
#include <linux/notifier.h>
34
#include <linux/topology.h>
35
#include <linux/profile.h>
L
Linus Torvalds 已提交
36 37

#include <asm/ptrace.h>
A
Arun Sharma 已提交
38
#include <linux/atomic.h>
L
Linus Torvalds 已提交
39
#include <asm/irq.h>
40
#include <asm/hw_irq.h>
41
#include <asm/kvm_ppc.h>
42
#include <asm/dbell.h>
L
Linus Torvalds 已提交
43 44 45 46 47 48
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/machdep.h>
49
#include <asm/cputhreads.h>
L
Linus Torvalds 已提交
50
#include <asm/cputable.h>
51
#include <asm/mpic.h>
52
#include <asm/vdso_datapage.h>
P
Paul Mackerras 已提交
53 54 55
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
56
#include <asm/vdso.h>
57
#include <asm/debug.h>
58
#include <asm/kexec.h>
59
#include <asm/asm-prototypes.h>
60
#include <asm/cpu_has_feature.h>
P
Paul Mackerras 已提交
61

L
Linus Torvalds 已提交
62
#ifdef DEBUG
63
#include <asm/udbg.h>
L
Linus Torvalds 已提交
64 65 66 67 68
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif

69
#ifdef CONFIG_HOTPLUG_CPU
70 71
/* State of each CPU during hotplug phases */
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
72 73
#endif

74 75
struct thread_info *secondary_ti;

76 77
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
L
Linus Torvalds 已提交
78

79
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
80
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
L
Linus Torvalds 已提交
81

P
Paul Mackerras 已提交
82
/* SMP operations for this machine */
L
Linus Torvalds 已提交
83 84
struct smp_ops_t *smp_ops;

85 86
/* Can't be static due to PowerMac hackery */
volatile unsigned int cpu_callin_map[NR_CPUS];
L
Linus Torvalds 已提交
87 88 89

int smt_enabled_at_boot = 1;

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
/*
 * Returns 1 if the specified cpu should be brought up during boot.
 * Used to inhibit booting threads if they've been disabled or
 * limited on the command line
 */
int smp_generic_cpu_bootable(unsigned int nr)
{
	/* Special case - we inhibit secondary thread startup
	 * during boot if the user requests it.
	 */
	if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
			return 0;
		if (smt_enabled_at_boot
		    && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
			return 0;
	}

	return 1;
}


P
Paul Mackerras 已提交
112
#ifdef CONFIG_PPC64
113
int smp_generic_kick_cpu(int nr)
L
Linus Torvalds 已提交
114 115 116 117 118 119 120 121
{
	BUG_ON(nr < 0 || nr >= NR_CPUS);

	/*
	 * The processor is currently spinning, waiting for the
	 * cpu_start field to become non-zero After we set cpu_start,
	 * the processor will continue on to secondary_start
	 */
122 123 124 125 126 127 128 129 130 131 132
	if (!paca[nr].cpu_start) {
		paca[nr].cpu_start = 1;
		smp_mb();
		return 0;
	}

#ifdef CONFIG_HOTPLUG_CPU
	/*
	 * Ok it's not there, so it might be soft-unplugged, let's
	 * try to bring it back
	 */
133
	generic_set_cpu_up(nr);
134 135 136
	smp_wmb();
	smp_send_reschedule(nr);
#endif /* CONFIG_HOTPLUG_CPU */
137 138

	return 0;
L
Linus Torvalds 已提交
139
}
140
#endif /* CONFIG_PPC64 */
L
Linus Torvalds 已提交
141

142 143 144 145 146 147 148 149
static irqreturn_t call_function_action(int irq, void *data)
{
	generic_smp_call_function_interrupt();
	return IRQ_HANDLED;
}

static irqreturn_t reschedule_action(int irq, void *data)
{
150
	scheduler_ipi();
151 152 153
	return IRQ_HANDLED;
}

154
static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
155
{
156
	tick_broadcast_ipi_handler();
157 158 159
	return IRQ_HANDLED;
}

160 161
#ifdef CONFIG_NMI_IPI
static irqreturn_t nmi_ipi_action(int irq, void *data)
162
{
163
	smp_handle_nmi_ipi(get_irq_regs());
164 165
	return IRQ_HANDLED;
}
166
#endif
167 168 169 170

static irq_handler_t smp_ipi_action[] = {
	[PPC_MSG_CALL_FUNCTION] =  call_function_action,
	[PPC_MSG_RESCHEDULE] = reschedule_action,
171
	[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
172 173 174
#ifdef CONFIG_NMI_IPI
	[PPC_MSG_NMI_IPI] = nmi_ipi_action,
#endif
175 176
};

177 178 179 180 181
/*
 * The NMI IPI is a fallback and not truly non-maskable. It is simpler
 * than going through the call function infrastructure, and strongly
 * serialized, so it is more appropriate for debugging.
 */
182 183 184
const char *smp_ipi_name[] = {
	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",
	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
185
	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
186
	[PPC_MSG_NMI_IPI] = "nmi ipi",
187 188 189 190 191 192 193
};

/* optional function to request ipi, for controllers with >= 4 ipis */
int smp_request_message_ipi(int virq, int msg)
{
	int err;

194
	if (msg < 0 || msg > PPC_MSG_NMI_IPI)
195
		return -EINVAL;
196 197
#ifndef CONFIG_NMI_IPI
	if (msg == PPC_MSG_NMI_IPI)
198 199
		return 1;
#endif
200

201
	err = request_irq(virq, smp_ipi_action[msg],
202
			  IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
203
			  smp_ipi_name[msg], NULL);
204 205 206 207 208 209
	WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
		virq, smp_ipi_name[msg], err);

	return err;
}

210
#ifdef CONFIG_PPC_SMP_MUXED_IPI
211
struct cpu_messages {
212
	long messages;			/* current messages */
213 214 215
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);

216
void smp_muxed_ipi_set_message(int cpu, int msg)
217 218
{
	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
219
	char *message = (char *)&info->messages;
220

221 222 223 224
	/*
	 * Order previous accesses before accesses in the IPI handler.
	 */
	smp_mb();
225
	message[msg] = 1;
226 227 228 229 230
}

void smp_muxed_ipi_message_pass(int cpu, int msg)
{
	smp_muxed_ipi_set_message(cpu, msg);
231

232 233 234 235
	/*
	 * cause_ipi functions are required to include a full barrier
	 * before doing whatever causes the IPI.
	 */
236
	smp_ops->cause_ipi(cpu);
237 238
}

239
#ifdef __BIG_ENDIAN__
240
#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
241
#else
242
#define IPI_MESSAGE(A) (1uL << (8 * (A)))
243 244
#endif

245
irqreturn_t smp_ipi_demux(void)
246 247 248 249 250 251 252 253
{
	mb();	/* order any irq clear */

	return smp_ipi_demux_relaxed();
}

/* sync-free variant. Callers should ensure synchronization */
irqreturn_t smp_ipi_demux_relaxed(void)
254
{
255
	struct cpu_messages *info;
256
	unsigned long all;
257

258
	info = this_cpu_ptr(&ipi_message);
259
	do {
260
		all = xchg(&info->messages, 0);
261 262 263 264 265 266 267 268 269 270 271
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
		/*
		 * Must check for PPC_MSG_RM_HOST_ACTION messages
		 * before PPC_MSG_CALL_FUNCTION messages because when
		 * a VM is destroyed, we call kick_all_cpus_sync()
		 * to ensure that any pending PPC_MSG_RM_HOST_ACTION
		 * messages have completed before we free any VCPUs.
		 */
		if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
			kvmppc_xics_ipi_action();
#endif
272
		if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
273
			generic_smp_call_function_interrupt();
274
		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
275
			scheduler_ipi();
276 277
		if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
			tick_broadcast_ipi_handler();
278 279 280 281
#ifdef CONFIG_NMI_IPI
		if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
			nmi_ipi_action(0, NULL);
#endif
282 283
	} while (info->messages);

284 285
	return IRQ_HANDLED;
}
286
#endif /* CONFIG_PPC_SMP_MUXED_IPI */
287

288 289 290 291 292 293 294 295 296 297
static inline void do_message_pass(int cpu, int msg)
{
	if (smp_ops->message_pass)
		smp_ops->message_pass(cpu, msg);
#ifdef CONFIG_PPC_SMP_MUXED_IPI
	else
		smp_muxed_ipi_message_pass(cpu, msg);
#endif
}

L
Linus Torvalds 已提交
298 299
void smp_send_reschedule(int cpu)
{
300
	if (likely(smp_ops))
301
		do_message_pass(cpu, PPC_MSG_RESCHEDULE);
L
Linus Torvalds 已提交
302
}
303
EXPORT_SYMBOL_GPL(smp_send_reschedule);
L
Linus Torvalds 已提交
304

305 306
void arch_send_call_function_single_ipi(int cpu)
{
307
	do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
308 309
}

310
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
311 312 313
{
	unsigned int cpu;

314
	for_each_cpu(cpu, mask)
315
		do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
316 317
}

318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
#ifdef CONFIG_NMI_IPI

/*
 * "NMI IPI" system.
 *
 * NMI IPIs may not be recoverable, so should not be used as ongoing part of
 * a running system. They can be used for crash, debug, halt/reboot, etc.
 *
 * NMI IPIs are globally single threaded. No more than one in progress at
 * any time.
 *
 * The IPI call waits with interrupts disabled until all targets enter the
 * NMI handler, then the call returns.
 *
 * No new NMI can be initiated until targets exit the handler.
 *
 * The IPI call may time out without all targets entering the NMI handler.
 * In that case, there is some logic to recover (and ignore subsequent
 * NMI interrupts that may eventually be raised), but the platform interrupt
 * handler may not be able to distinguish this from other exception causes,
 * which may cause a crash.
 */

static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
static struct cpumask nmi_ipi_pending_mask;
static int nmi_ipi_busy_count = 0;
static void (*nmi_ipi_function)(struct pt_regs *) = NULL;

static void nmi_ipi_lock_start(unsigned long *flags)
{
	raw_local_irq_save(*flags);
	hard_irq_disable();
	while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
		raw_local_irq_restore(*flags);
		cpu_relax();
		raw_local_irq_save(*flags);
		hard_irq_disable();
	}
}

static void nmi_ipi_lock(void)
{
	while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
		cpu_relax();
}

static void nmi_ipi_unlock(void)
{
	smp_mb();
	WARN_ON(atomic_read(&__nmi_ipi_lock) != 1);
	atomic_set(&__nmi_ipi_lock, 0);
}

static void nmi_ipi_unlock_end(unsigned long *flags)
{
	nmi_ipi_unlock();
	raw_local_irq_restore(*flags);
}

/*
 * Platform NMI handler calls this to ack
 */
int smp_handle_nmi_ipi(struct pt_regs *regs)
{
	void (*fn)(struct pt_regs *);
	unsigned long flags;
	int me = raw_smp_processor_id();
	int ret = 0;

	/*
	 * Unexpected NMIs are possible here because the interrupt may not
	 * be able to distinguish NMI IPIs from other types of NMIs, or
	 * because the caller may have timed out.
	 */
	nmi_ipi_lock_start(&flags);
	if (!nmi_ipi_busy_count)
		goto out;
	if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
		goto out;

	fn = nmi_ipi_function;
	if (!fn)
		goto out;

	cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
	nmi_ipi_busy_count++;
	nmi_ipi_unlock();

	ret = 1;

	fn(regs);

	nmi_ipi_lock();
	nmi_ipi_busy_count--;
out:
	nmi_ipi_unlock_end(&flags);

	return ret;
}

static void do_smp_send_nmi_ipi(int cpu)
{
420 421 422
	if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
		return;

423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
	if (cpu >= 0) {
		do_message_pass(cpu, PPC_MSG_NMI_IPI);
	} else {
		int c;

		for_each_online_cpu(c) {
			if (c == raw_smp_processor_id())
				continue;
			do_message_pass(c, PPC_MSG_NMI_IPI);
		}
	}
}

/*
 * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
 * - fn is the target callback function.
 * - delay_us > 0 is the delay before giving up waiting for targets to
 *   enter the handler, == 0 specifies indefinite delay.
 */
static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
{
	unsigned long flags;
	int me = raw_smp_processor_id();
	int ret = 1;

	BUG_ON(cpu == me);
	BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);

	if (unlikely(!smp_ops))
		return 0;

	/* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
	nmi_ipi_lock_start(&flags);
	while (nmi_ipi_busy_count) {
		nmi_ipi_unlock_end(&flags);
		cpu_relax();
		nmi_ipi_lock_start(&flags);
	}

	nmi_ipi_function = fn;

	if (cpu < 0) {
		/* ALL_OTHERS */
		cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
		cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
	} else {
		/* cpumask starts clear */
		cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
	}
	nmi_ipi_busy_count++;
	nmi_ipi_unlock();

	do_smp_send_nmi_ipi(cpu);

	while (!cpumask_empty(&nmi_ipi_pending_mask)) {
		udelay(1);
		if (delay_us) {
			delay_us--;
			if (!delay_us)
				break;
		}
	}

	nmi_ipi_lock();
	if (!cpumask_empty(&nmi_ipi_pending_mask)) {
		/* Could not gather all CPUs */
		ret = 0;
		cpumask_clear(&nmi_ipi_pending_mask);
	}
	nmi_ipi_busy_count--;
	nmi_ipi_unlock_end(&flags);

	return ret;
}
#endif /* CONFIG_NMI_IPI */

499 500 501 502 503 504 505 506 507 508
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
	unsigned int cpu;

	for_each_cpu(cpu, mask)
		do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
}
#endif

509 510
#ifdef CONFIG_DEBUGGER
void debugger_ipi_callback(struct pt_regs *regs)
L
Linus Torvalds 已提交
511
{
512 513
	debugger_ipi(regs);
}
514

515 516 517
void smp_send_debugger_break(void)
{
	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
L
Linus Torvalds 已提交
518 519 520
}
#endif

521
#ifdef CONFIG_KEXEC_CORE
522 523
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
524
	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
525 526 527
}
#endif

L
Linus Torvalds 已提交
528 529
static void stop_this_cpu(void *dummy)
{
530 531 532
	/* Remove this CPU */
	set_cpu_online(smp_processor_id(), false);

L
Linus Torvalds 已提交
533 534 535 536 537
	local_irq_disable();
	while (1)
		;
}

538 539
void smp_send_stop(void)
{
540
	smp_call_function(stop_this_cpu, NULL, 0);
L
Linus Torvalds 已提交
541 542 543 544
}

struct thread_info *current_set[NR_CPUS];

545
static void smp_store_cpu_info(int id)
L
Linus Torvalds 已提交
546
{
547
	per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
548 549 550 551
#ifdef CONFIG_PPC_FSL_BOOK3E
	per_cpu(next_tlbcam_idx, id)
		= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
#endif
L
Linus Torvalds 已提交
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
}

void __init smp_prepare_cpus(unsigned int max_cpus)
{
	unsigned int cpu;

	DBG("smp_prepare_cpus\n");

	/* 
	 * setup_cpu may need to be called on the boot cpu. We havent
	 * spun any cpus up but lets be paranoid.
	 */
	BUG_ON(boot_cpuid != smp_processor_id());

	/* Fixup boot cpu */
	smp_store_cpu_info(boot_cpuid);
	cpu_callin_map[boot_cpuid] = 1;

570 571 572 573 574
	for_each_possible_cpu(cpu) {
		zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
					GFP_KERNEL, cpu_to_node(cpu));
		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
					GFP_KERNEL, cpu_to_node(cpu));
575 576 577
		/*
		 * numa_node_id() works after this.
		 */
578 579 580 581 582
		if (cpu_present(cpu)) {
			set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
			set_cpu_numa_mem(cpu,
				local_memory_node(numa_cpu_lookup_table[cpu]));
		}
583 584 585 586 587
	}

	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));

588 589
	if (smp_ops && smp_ops->probe)
		smp_ops->probe();
L
Linus Torvalds 已提交
590 591
}

592
void smp_prepare_boot_cpu(void)
L
Linus Torvalds 已提交
593 594
{
	BUG_ON(smp_processor_id() != boot_cpuid);
P
Paul Mackerras 已提交
595
#ifdef CONFIG_PPC64
L
Linus Torvalds 已提交
596
	paca[boot_cpuid].__current = current;
P
Paul Mackerras 已提交
597
#endif
598
	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
A
Al Viro 已提交
599
	current_set[boot_cpuid] = task_thread_info(current);
L
Linus Torvalds 已提交
600 601 602 603 604 605 606 607 608 609 610
}

#ifdef CONFIG_HOTPLUG_CPU

int generic_cpu_disable(void)
{
	unsigned int cpu = smp_processor_id();

	if (cpu == boot_cpuid)
		return -EBUSY;

611
	set_cpu_online(cpu, false);
612
#ifdef CONFIG_PPC64
613
	vdso_data->processorCount--;
614
#endif
615 616 617 618 619 620 621 622
	/* Update affinity of all IRQs previously aimed at this CPU */
	irq_migrate_all_off_this_cpu();

	/* Give the CPU time to drain in-flight ones */
	local_irq_enable();
	mdelay(1);
	local_irq_disable();

L
Linus Torvalds 已提交
623 624 625 626 627 628 629 630
	return 0;
}

void generic_cpu_die(unsigned int cpu)
{
	int i;

	for (i = 0; i < 100; i++) {
631
		smp_rmb();
632
		if (is_cpu_dead(cpu))
L
Linus Torvalds 已提交
633 634 635 636 637 638
			return;
		msleep(100);
	}
	printk(KERN_ERR "CPU%d didn't die...\n", cpu);
}

639 640 641 642
void generic_set_cpu_dead(unsigned int cpu)
{
	per_cpu(cpu_state, cpu) = CPU_DEAD;
}
643

644 645 646 647 648 649 650 651 652 653
/*
 * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
 * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
 * which makes the delay in generic_cpu_die() not happen.
 */
void generic_set_cpu_up(unsigned int cpu)
{
	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
}

654 655 656 657
int generic_check_cpu_restart(unsigned int cpu)
{
	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
}
658

659 660 661 662 663
int is_cpu_dead(unsigned int cpu)
{
	return per_cpu(cpu_state, cpu) == CPU_DEAD;
}

664
static bool secondaries_inhibited(void)
665
{
666
	return kvm_hv_mode_active();
667 668 669 670 671 672
}

#else /* HOTPLUG_CPU */

#define secondaries_inhibited()		0

L
Linus Torvalds 已提交
673 674
#endif

675
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
676
{
677
	struct thread_info *ti = task_thread_info(idle);
678 679

#ifdef CONFIG_PPC64
680
	paca[cpu].__current = idle;
681 682 683
	paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
	ti->cpu = cpu;
684
	secondary_ti = current_set[cpu] = ti;
685 686
}

687
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
L
Linus Torvalds 已提交
688
{
689
	int rc, c;
L
Linus Torvalds 已提交
690

691 692 693 694
	/*
	 * Don't allow secondary threads to come online if inhibited
	 */
	if (threads_per_core > 1 && secondaries_inhibited() &&
695
	    cpu_thread_in_subcore(cpu))
696 697
		return -EBUSY;

698 699
	if (smp_ops == NULL ||
	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
L
Linus Torvalds 已提交
700 701
		return -EINVAL;

702
	cpu_idle_thread_init(cpu, tidle);
703

704 705 706 707 708 709 710 711 712 713
	/*
	 * The platform might need to allocate resources prior to bringing
	 * up the CPU
	 */
	if (smp_ops->prepare_cpu) {
		rc = smp_ops->prepare_cpu(cpu);
		if (rc)
			return rc;
	}

L
Linus Torvalds 已提交
714 715 716 717 718 719 720 721 722
	/* Make sure callin-map entry is 0 (can be leftover a CPU
	 * hotplug
	 */
	cpu_callin_map[cpu] = 0;

	/* The information for processor bringup must
	 * be written out to main store before we release
	 * the processor.
	 */
723
	smp_mb();
L
Linus Torvalds 已提交
724 725 726

	/* wake up cpus */
	DBG("smp: kicking cpu %d\n", cpu);
727 728 729 730 731
	rc = smp_ops->kick_cpu(cpu);
	if (rc) {
		pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
		return rc;
	}
L
Linus Torvalds 已提交
732 733 734 735 736 737 738

	/*
	 * wait to see if the cpu made a callin (is actually up).
	 * use this value that I found through experimentation.
	 * -- Cort
	 */
	if (system_state < SYSTEM_RUNNING)
739
		for (c = 50000; c && !cpu_callin_map[cpu]; c--)
L
Linus Torvalds 已提交
740 741 742 743 744 745 746
			udelay(100);
#ifdef CONFIG_HOTPLUG_CPU
	else
		/*
		 * CPUs can take much longer to come up in the
		 * hotplug case.  Wait five seconds.
		 */
747 748
		for (c = 5000; c && !cpu_callin_map[cpu]; c--)
			msleep(1);
L
Linus Torvalds 已提交
749 750 751
#endif

	if (!cpu_callin_map[cpu]) {
752
		printk(KERN_ERR "Processor %u is stuck.\n", cpu);
L
Linus Torvalds 已提交
753 754 755
		return -ENOENT;
	}

756
	DBG("Processor %u found.\n", cpu);
L
Linus Torvalds 已提交
757 758 759 760

	if (smp_ops->give_timebase)
		smp_ops->give_timebase();

761
	/* Wait until cpu puts itself in the online & active maps */
762
	while (!cpu_online(cpu))
L
Linus Torvalds 已提交
763 764 765 766 767
		cpu_relax();

	return 0;
}

768 769 770 771 772 773
/* Return the value of the reg property corresponding to the given
 * logical cpu.
 */
int cpu_to_core_id(int cpu)
{
	struct device_node *np;
774
	const __be32 *reg;
775 776 777 778 779 780 781 782 783 784
	int id = -1;

	np = of_get_cpu_node(cpu, NULL);
	if (!np)
		goto out;

	reg = of_get_property(np, "reg", NULL);
	if (!reg)
		goto out;

785
	id = be32_to_cpup(reg);
786 787 788 789
out:
	of_node_put(np);
	return id;
}
790
EXPORT_SYMBOL_GPL(cpu_to_core_id);
791

792 793 794 795 796 797 798 799 800 801 802 803 804
/* Helper routines for cpu to core mapping */
int cpu_core_index_of_thread(int cpu)
{
	return cpu >> threads_shift;
}
EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);

int cpu_first_thread_of_core(int core)
{
	return core << threads_shift;
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
{
	const struct cpumask *mask;
	struct device_node *np;
	int i, plen;
	const __be32 *prop;

	mask = add ? cpu_online_mask : cpu_present_mask;
	for_each_cpu(i, mask) {
		np = of_get_cpu_node(i, NULL);
		if (!np)
			continue;
		prop = of_get_property(np, "ibm,chip-id", &plen);
		if (prop && plen == sizeof(int) &&
		    of_read_number(prop, 1) == chipid) {
			if (add) {
				cpumask_set_cpu(cpu, cpu_core_mask(i));
				cpumask_set_cpu(i, cpu_core_mask(cpu));
			} else {
				cpumask_clear_cpu(cpu, cpu_core_mask(i));
				cpumask_clear_cpu(i, cpu_core_mask(cpu));
			}
		}
		of_node_put(np);
	}
}

832
/* Must be called when no change can occur to cpu_present_mask,
833 834 835 836 837
 * i.e. during cpu online or offline.
 */
static struct device_node *cpu_to_l2cache(int cpu)
{
	struct device_node *np;
838
	struct device_node *cache;
839 840 841 842 843 844 845 846

	if (!cpu_present(cpu))
		return NULL;

	np = of_get_cpu_node(cpu, NULL);
	if (np == NULL)
		return NULL;

847 848
	cache = of_find_next_cache_node(np);

849 850
	of_node_put(np);

851
	return cache;
852
}
L
Linus Torvalds 已提交
853

854 855
static void traverse_core_siblings(int cpu, bool add)
{
856
	struct device_node *l2_cache, *np;
857
	const struct cpumask *mask;
858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
	int i, chip, plen;
	const __be32 *prop;

	/* First see if we have ibm,chip-id properties in cpu nodes */
	np = of_get_cpu_node(cpu, NULL);
	if (np) {
		chip = -1;
		prop = of_get_property(np, "ibm,chip-id", &plen);
		if (prop && plen == sizeof(int))
			chip = of_read_number(prop, 1);
		of_node_put(np);
		if (chip >= 0) {
			traverse_siblings_chip_id(cpu, add, chip);
			return;
		}
	}
874 875 876 877

	l2_cache = cpu_to_l2cache(cpu);
	mask = add ? cpu_online_mask : cpu_present_mask;
	for_each_cpu(i, mask) {
878
		np = cpu_to_l2cache(i);
879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894
		if (!np)
			continue;
		if (np == l2_cache) {
			if (add) {
				cpumask_set_cpu(cpu, cpu_core_mask(i));
				cpumask_set_cpu(i, cpu_core_mask(cpu));
			} else {
				cpumask_clear_cpu(cpu, cpu_core_mask(i));
				cpumask_clear_cpu(i, cpu_core_mask(cpu));
			}
		}
		of_node_put(np);
	}
	of_node_put(l2_cache);
}

L
Linus Torvalds 已提交
895
/* Activate a secondary processor. */
896
void start_secondary(void *unused)
L
Linus Torvalds 已提交
897 898
{
	unsigned int cpu = smp_processor_id();
899
	int i, base;
L
Linus Torvalds 已提交
900

V
Vegard Nossum 已提交
901
	mmgrab(&init_mm);
L
Linus Torvalds 已提交
902 903 904
	current->active_mm = &init_mm;

	smp_store_cpu_info(cpu);
P
Paul Mackerras 已提交
905
	set_dec(tb_ticks_per_jiffy);
A
Andrew Morton 已提交
906
	preempt_disable();
907
	cpu_callin_map[cpu] = 1;
L
Linus Torvalds 已提交
908

909 910
	if (smp_ops->setup_cpu)
		smp_ops->setup_cpu(cpu);
L
Linus Torvalds 已提交
911 912 913
	if (smp_ops->take_timebase)
		smp_ops->take_timebase();

914 915
	secondary_cpu_time_init();

916 917 918
#ifdef CONFIG_PPC64
	if (system_state == SYSTEM_RUNNING)
		vdso_data->processorCount++;
919 920

	vdso_getcpu_init();
921
#endif
922
	/* Update sibling maps */
923
	base = cpu_first_thread_sibling(cpu);
924
	for (i = 0; i < threads_per_core; i++) {
925
		if (cpu_is_offline(base + i) && (cpu != base + i))
926
			continue;
927 928
		cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
		cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
929 930 931 932 933

		/* cpu_core_map should be a superset of
		 * cpu_sibling_map even if we don't have cache
		 * information, so update the former here, too.
		 */
934 935
		cpumask_set_cpu(cpu, cpu_core_mask(base + i));
		cpumask_set_cpu(base + i, cpu_core_mask(cpu));
936
	}
937
	traverse_core_siblings(cpu, true);
L
Linus Torvalds 已提交
938

939 940 941
	set_numa_node(numa_cpu_lookup_table[cpu]);
	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));

942 943 944 945
	smp_wmb();
	notify_cpu_starting(cpu);
	set_cpu_online(cpu, true);

L
Linus Torvalds 已提交
946 947
	local_irq_enable();

948
	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
949 950

	BUG();
L
Linus Torvalds 已提交
951 952 953 954 955 956 957
}

int setup_profiling_timer(unsigned int multiplier)
{
	return 0;
}

958 959
#ifdef CONFIG_SCHED_SMT
/* cpumask of CPUs with asymetric SMT dependancy */
G
Guenter Roeck 已提交
960
static int powerpc_smt_flags(void)
961
{
962
	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979

	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
		flags |= SD_ASYM_PACKING;
	}
	return flags;
}
#endif

static struct sched_domain_topology_level powerpc_topology[] = {
#ifdef CONFIG_SCHED_SMT
	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
#endif
	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
	{ NULL, },
};

L
Linus Torvalds 已提交
980 981
void __init smp_cpus_done(unsigned int max_cpus)
{
982
	cpumask_var_t old_mask;
L
Linus Torvalds 已提交
983 984 985 986 987

	/* We want the setup_cpu() here to be called from CPU 0, but our
	 * init thread may have been "borrowed" by another CPU in the meantime
	 * se we pin us down to CPU 0 for a short while
	 */
988
	alloc_cpumask_var(&old_mask, GFP_NOWAIT);
989
	cpumask_copy(old_mask, &current->cpus_allowed);
J
Julia Lawall 已提交
990
	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
L
Linus Torvalds 已提交
991
	
992
	if (smp_ops && smp_ops->setup_cpu)
993
		smp_ops->setup_cpu(boot_cpuid);
L
Linus Torvalds 已提交
994

995 996 997
	set_cpus_allowed_ptr(current, old_mask);

	free_cpumask_var(old_mask);
998

999 1000 1001
	if (smp_ops && smp_ops->bringup_done)
		smp_ops->bringup_done();

1002
	dump_numa_cpu_topology();
1003

1004
	set_sched_topology(powerpc_topology);
L
Linus Torvalds 已提交
1005

1006 1007
}

L
Linus Torvalds 已提交
1008 1009 1010
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
1011 1012 1013
	int cpu = smp_processor_id();
	int base, i;
	int err;
L
Linus Torvalds 已提交
1014

1015 1016 1017 1018 1019 1020 1021 1022
	if (!smp_ops->cpu_disable)
		return -ENOSYS;

	err = smp_ops->cpu_disable();
	if (err)
		return err;

	/* Update sibling maps */
1023
	base = cpu_first_thread_sibling(cpu);
1024
	for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
1025 1026 1027 1028
		cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
		cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
		cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
		cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
1029
	}
1030
	traverse_core_siblings(cpu, false);
1031 1032

	return 0;
L
Linus Torvalds 已提交
1033 1034 1035 1036 1037 1038 1039
}

void __cpu_die(unsigned int cpu)
{
	if (smp_ops->cpu_die)
		smp_ops->cpu_die(cpu);
}
1040

1041 1042 1043 1044
void cpu_die(void)
{
	if (ppc_md.cpu_die)
		ppc_md.cpu_die();
1045 1046 1047

	/* If we return, we re-enter start_secondary */
	start_secondary_resume();
1048
}
1049

L
Linus Torvalds 已提交
1050
#endif