smp.c 19.9 KB
Newer Older
J
Jeremy Fitzhardinge 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Xen SMP support
 *
 * This file implements the Xen versions of smp_ops.  SMP under Xen is
 * very straightforward.  Bringing a CPU up is simply a matter of
 * loading its initial context and setting it running.
 *
 * IPIs are handled through the Xen event mechanism.
 *
 * Because virtual CPUs can be scheduled onto any real CPU, there's no
 * useful topology information for the kernel to make use of.  As a
 * result, all CPUs are treated as if they're single-core and
 * single-threaded.
 */
#include <linux/sched.h>
#include <linux/err.h>
17
#include <linux/slab.h>
J
Jeremy Fitzhardinge 已提交
18
#include <linux/smp.h>
19
#include <linux/irq_work.h>
20
#include <linux/tick.h>
J
Jeremy Fitzhardinge 已提交
21 22 23 24 25 26 27 28

#include <asm/paravirt.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/cpu.h>

#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
29
#include <xen/interface/xenpmu.h>
J
Jeremy Fitzhardinge 已提交
30 31 32 33

#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>

34
#include <xen/xen.h>
J
Jeremy Fitzhardinge 已提交
35 36 37
#include <xen/page.h>
#include <xen/events.h>

38
#include <xen/hvc-console.h>
J
Jeremy Fitzhardinge 已提交
39 40
#include "xen-ops.h"
#include "mmu.h"
41
#include "smp.h"
42
#include "pmu.h"
J
Jeremy Fitzhardinge 已提交
43

44
cpumask_var_t xen_cpu_initialized_map;
J
Jeremy Fitzhardinge 已提交
45

46 47 48 49
struct xen_common_irq {
	int irq;
	char *name;
};
50 51 52 53
static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
54
static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
55
static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
J
Jeremy Fitzhardinge 已提交
56 57

static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
58
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
59
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
J
Jeremy Fitzhardinge 已提交
60 61

/*
62
 * Reschedule call back.
J
Jeremy Fitzhardinge 已提交
63 64 65
 */
static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
{
66
	inc_irq_stat(irq_resched_count);
67
	scheduler_ipi();
68

J
Jeremy Fitzhardinge 已提交
69 70 71
	return IRQ_HANDLED;
}

72
static void cpu_bringup(void)
J
Jeremy Fitzhardinge 已提交
73
{
74
	int cpu;
J
Jeremy Fitzhardinge 已提交
75 76

	cpu_init();
A
Alex Nixon 已提交
77
	touch_softlockup_watchdog();
78 79
	preempt_disable();

80 81 82 83 84
	/* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
	if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
		xen_enable_sysenter();
		xen_enable_syscall();
	}
85 86 87 88
	cpu = smp_processor_id();
	smp_store_cpu_info(cpu);
	cpu_data(cpu).x86_max_cores = 1;
	set_cpu_sibling_map(cpu);
J
Jeremy Fitzhardinge 已提交
89 90 91

	xen_setup_cpu_clockevents();

92 93
	notify_cpu_starting(cpu);

94
	set_cpu_online(cpu, true);
95

96
	cpu_set_state_online(cpu);  /* Implies full memory barrier. */
97

J
Jeremy Fitzhardinge 已提交
98 99
	/* We can take interrupts now: we're officially "up". */
	local_irq_enable();
A
Alex Nixon 已提交
100 101
}

102 103 104 105 106 107
/*
 * Note: cpu parameter is only relevant for PVH. The reason for passing it
 * is we can't do smp_processor_id until the percpu segments are loaded, for
 * which we need the cpu number! So we pass it in rdi as first parameter.
 */
asmlinkage __visible void cpu_bringup_and_idle(int cpu)
A
Alex Nixon 已提交
108
{
109
#ifdef CONFIG_XEN_PVH
110 111
	if (xen_feature(XENFEAT_auto_translated_physmap) &&
	    xen_feature(XENFEAT_supervisor_mode_kernel))
112
		xen_pvh_secondary_vcpu_init(cpu);
113
#endif
A
Alex Nixon 已提交
114
	cpu_bringup();
115
	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
J
Jeremy Fitzhardinge 已提交
116 117
}

118 119
static void xen_smp_intr_free(unsigned int cpu)
{
120
	if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
121
		unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL);
122
		per_cpu(xen_resched_irq, cpu).irq = -1;
123 124
		kfree(per_cpu(xen_resched_irq, cpu).name);
		per_cpu(xen_resched_irq, cpu).name = NULL;
125 126
	}
	if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) {
127
		unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL);
128
		per_cpu(xen_callfunc_irq, cpu).irq = -1;
129 130
		kfree(per_cpu(xen_callfunc_irq, cpu).name);
		per_cpu(xen_callfunc_irq, cpu).name = NULL;
131 132
	}
	if (per_cpu(xen_debug_irq, cpu).irq >= 0) {
133
		unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL);
134
		per_cpu(xen_debug_irq, cpu).irq = -1;
135 136
		kfree(per_cpu(xen_debug_irq, cpu).name);
		per_cpu(xen_debug_irq, cpu).name = NULL;
137 138
	}
	if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) {
139
		unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq,
140
				       NULL);
141
		per_cpu(xen_callfuncsingle_irq, cpu).irq = -1;
142 143
		kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
		per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
144
	}
145 146 147
	if (xen_hvm_domain())
		return;

148
	if (per_cpu(xen_irq_work, cpu).irq >= 0) {
149
		unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
150
		per_cpu(xen_irq_work, cpu).irq = -1;
151 152
		kfree(per_cpu(xen_irq_work, cpu).name);
		per_cpu(xen_irq_work, cpu).name = NULL;
153
	}
154 155 156 157 158 159 160

	if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
		unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
		per_cpu(xen_pmu_irq, cpu).irq = -1;
		kfree(per_cpu(xen_pmu_irq, cpu).name);
		per_cpu(xen_pmu_irq, cpu).name = NULL;
	}
161
};
J
Jeremy Fitzhardinge 已提交
162 163 164
static int xen_smp_intr_init(unsigned int cpu)
{
	int rc;
165
	char *resched_name, *callfunc_name, *debug_name, *pmu_name;
J
Jeremy Fitzhardinge 已提交
166 167 168 169 170

	resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
	rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
				    cpu,
				    xen_reschedule_interrupt,
171
				    IRQF_PERCPU|IRQF_NOBALANCING,
J
Jeremy Fitzhardinge 已提交
172 173 174 175
				    resched_name,
				    NULL);
	if (rc < 0)
		goto fail;
176
	per_cpu(xen_resched_irq, cpu).irq = rc;
177
	per_cpu(xen_resched_irq, cpu).name = resched_name;
J
Jeremy Fitzhardinge 已提交
178 179 180 181 182

	callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
				    cpu,
				    xen_call_function_interrupt,
183
				    IRQF_PERCPU|IRQF_NOBALANCING,
J
Jeremy Fitzhardinge 已提交
184 185 186 187
				    callfunc_name,
				    NULL);
	if (rc < 0)
		goto fail;
188
	per_cpu(xen_callfunc_irq, cpu).irq = rc;
189
	per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
J
Jeremy Fitzhardinge 已提交
190

191 192
	debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
	rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
193
				     IRQF_PERCPU | IRQF_NOBALANCING,
194 195 196
				     debug_name, NULL);
	if (rc < 0)
		goto fail;
197
	per_cpu(xen_debug_irq, cpu).irq = rc;
198
	per_cpu(xen_debug_irq, cpu).name = debug_name;
199

200 201 202 203
	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
				    cpu,
				    xen_call_function_single_interrupt,
204
				    IRQF_PERCPU|IRQF_NOBALANCING,
205 206 207 208
				    callfunc_name,
				    NULL);
	if (rc < 0)
		goto fail;
209
	per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
210
	per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
211

212 213 214 215 216 217 218
	/*
	 * The IRQ worker on PVHVM goes through the native path and uses the
	 * IPI mechanism.
	 */
	if (xen_hvm_domain())
		return 0;

219 220 221 222
	callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
	rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
				    cpu,
				    xen_irq_work_interrupt,
223
				    IRQF_PERCPU|IRQF_NOBALANCING,
224 225 226 227
				    callfunc_name,
				    NULL);
	if (rc < 0)
		goto fail;
228
	per_cpu(xen_irq_work, cpu).irq = rc;
229
	per_cpu(xen_irq_work, cpu).name = callfunc_name;
230

231 232 233 234 235 236 237 238 239 240 241 242
	if (is_xen_pmu(cpu)) {
		pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
		rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
					     xen_pmu_irq_handler,
					     IRQF_PERCPU|IRQF_NOBALANCING,
					     pmu_name, NULL);
		if (rc < 0)
			goto fail;
		per_cpu(xen_pmu_irq, cpu).irq = rc;
		per_cpu(xen_pmu_irq, cpu).name = pmu_name;
	}

J
Jeremy Fitzhardinge 已提交
243 244 245
	return 0;

 fail:
246
	xen_smp_intr_free(cpu);
J
Jeremy Fitzhardinge 已提交
247 248 249
	return rc;
}

250
static void __init xen_fill_possible_map(void)
J
Jeremy Fitzhardinge 已提交
251 252 253
{
	int i, rc;

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
	if (xen_initial_domain())
		return;

	for (i = 0; i < nr_cpu_ids; i++) {
		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
		if (rc >= 0) {
			num_processors++;
			set_cpu_possible(i, true);
		}
	}
}

static void __init xen_filter_cpu_maps(void)
{
	int i, rc;
269
	unsigned int subtract = 0;
270 271 272 273

	if (!xen_initial_domain())
		return;

274 275
	num_processors = 0;
	disabled_cpus = 0;
276
	for (i = 0; i < nr_cpu_ids; i++) {
J
Jeremy Fitzhardinge 已提交
277
		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
J
Jeremy Fitzhardinge 已提交
278 279
		if (rc >= 0) {
			num_processors++;
280
			set_cpu_possible(i, true);
281 282 283
		} else {
			set_cpu_possible(i, false);
			set_cpu_present(i, false);
284
			subtract++;
J
Jeremy Fitzhardinge 已提交
285
		}
J
Jeremy Fitzhardinge 已提交
286
	}
287 288 289 290 291 292 293 294 295 296 297 298 299
#ifdef CONFIG_HOTPLUG_CPU
	/* This is akin to using 'nr_cpus' on the Linux command line.
	 * Which is OK as when we use 'dom0_max_vcpus=X' we can only
	 * have up to X, while nr_cpu_ids is greater than X. This
	 * normally is not a problem, except when CPU hotplugging
	 * is involved and then there might be more than X CPUs
	 * in the guest - which will not work as there is no
	 * hypercall to expand the max number of VCPUs an already
	 * running guest has. So cap it up to X. */
	if (subtract)
		nr_cpu_ids = nr_cpu_ids - subtract;
#endif

J
Jeremy Fitzhardinge 已提交
300 301
}

302
static void __init xen_smp_prepare_boot_cpu(void)
J
Jeremy Fitzhardinge 已提交
303 304 305 306
{
	BUG_ON(smp_processor_id() != 0);
	native_smp_prepare_boot_cpu();

307
	if (xen_pv_domain()) {
308 309 310 311
		if (!xen_feature(XENFEAT_writable_page_tables))
			/* We've switched to the "real" per-cpu gdt, so make
			 * sure the old memory can be recycled. */
			make_lowmem_page_readwrite(xen_initial_gdt);
312

313 314 315 316 317 318 319 320 321
#ifdef CONFIG_X86_32
		/*
		 * Xen starts us with XEN_FLAT_RING1_DS, but linux code
		 * expects __USER_DS
		 */
		loadsegment(ds, __USER_DS);
		loadsegment(es, __USER_DS);
#endif

322 323 324 325 326 327 328 329 330
		xen_filter_cpu_maps();
		xen_setup_vcpu_info_placement();
	}
	/*
	 * The alternative logic (which patches the unlock/lock) runs before
	 * the smp bootup up code is activated. Hence we need to set this up
	 * the core kernel is being patched. Otherwise we will have only
	 * modules patched but not core code.
	 */
331
	xen_init_spinlocks();
J
Jeremy Fitzhardinge 已提交
332 333
}

334
static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
J
Jeremy Fitzhardinge 已提交
335 336
{
	unsigned cpu;
A
Andrew Jones 已提交
337
	unsigned int i;
J
Jeremy Fitzhardinge 已提交
338

339 340 341 342 343 344 345 346 347
	if (skip_ioapic_setup) {
		char *m = (max_cpus == 0) ?
			"The nosmp parameter is incompatible with Xen; " \
			"use Xen dom0_max_vcpus=1 parameter" :
			"The noapic parameter is incompatible with Xen";

		xen_raw_printk(m);
		panic(m);
	}
348 349
	xen_init_lock_cpu(0);

350
	smp_store_boot_cpu_info();
351
	cpu_data(0).x86_max_cores = 1;
A
Andrew Jones 已提交
352 353 354 355 356 357

	for_each_possible_cpu(i) {
		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
		zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
		zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
	}
J
Jeremy Fitzhardinge 已提交
358 359
	set_cpu_sibling_map(0);

360 361
	xen_pmu_init(0);

J
Jeremy Fitzhardinge 已提交
362 363 364
	if (xen_smp_intr_init(0))
		BUG();

365 366 367 368
	if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
		panic("could not allocate xen_cpu_initialized_map\n");

	cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
J
Jeremy Fitzhardinge 已提交
369 370 371

	/* Restrict the possible_map according to max_cpus. */
	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
372
		for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
J
Jeremy Fitzhardinge 已提交
373
			continue;
374
		set_cpu_possible(cpu, false);
J
Jeremy Fitzhardinge 已提交
375 376
	}

377
	for_each_possible_cpu(cpu)
378
		set_cpu_present(cpu, true);
J
Jeremy Fitzhardinge 已提交
379 380
}

381
static int
J
Jeremy Fitzhardinge 已提交
382 383 384
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
	struct vcpu_guest_context *ctxt;
385
	struct desc_struct *gdt;
386
	unsigned long gdt_mfn;
J
Jeremy Fitzhardinge 已提交
387

388 389
	/* used to tell cpu_init() that it can proceed with initialization */
	cpumask_set_cpu(cpu, cpu_callout_mask);
390
	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
J
Jeremy Fitzhardinge 已提交
391 392 393 394 395 396
		return 0;

	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
	if (ctxt == NULL)
		return -ENOMEM;

397 398 399
	gdt = get_cpu_gdt_table(cpu);

#ifdef CONFIG_X86_32
400
	/* Note: PVH is not yet supported on x86_32. */
401
	ctxt->user_regs.fs = __KERNEL_PERCPU;
402
	ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
403
#endif
J
Jeremy Fitzhardinge 已提交
404 405
	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));

406
	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
407
		ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
408
		ctxt->flags = VGCF_IN_KERNEL;
409 410 411
		ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
		ctxt->user_regs.ds = __USER_DS;
		ctxt->user_regs.es = __USER_DS;
412
		ctxt->user_regs.ss = __KERNEL_DS;
J
Jeremy Fitzhardinge 已提交
413

414
		xen_copy_trap_info(ctxt->trap_ctxt);
J
Jeremy Fitzhardinge 已提交
415

416
		ctxt->ldt_ents = 0;
417

418
		BUG_ON((unsigned long)gdt & ~PAGE_MASK);
J
Jeremy Fitzhardinge 已提交
419

420 421 422
		gdt_mfn = arbitrary_virt_to_mfn(gdt);
		make_lowmem_page_readonly(gdt);
		make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
J
Jeremy Fitzhardinge 已提交
423

424 425
		ctxt->gdt_frames[0] = gdt_mfn;
		ctxt->gdt_ents      = GDT_ENTRIES;
J
Jeremy Fitzhardinge 已提交
426

427 428
		ctxt->kernel_ss = __KERNEL_DS;
		ctxt->kernel_sp = idle->thread.sp0;
J
Jeremy Fitzhardinge 已提交
429

430
#ifdef CONFIG_X86_32
431 432
		ctxt->event_callback_cs     = __KERNEL_CS;
		ctxt->failsafe_callback_cs  = __KERNEL_CS;
433 434
#else
		ctxt->gs_base_kernel = per_cpu_offset(cpu);
435
#endif
436 437 438 439
		ctxt->event_callback_eip    =
					(unsigned long)xen_hypervisor_callback;
		ctxt->failsafe_callback_eip =
					(unsigned long)xen_failsafe_callback;
440 441
		ctxt->user_regs.cs = __KERNEL_CS;
		per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
442
	}
443 444 445 446 447 448
#ifdef CONFIG_XEN_PVH
	else {
		/*
		 * The vcpu comes on kernel page tables which have the NX pte
		 * bit set. This means before DS/SS is touched, NX in
		 * EFER must be set. Hence the following assembly glue code.
449
		 */
450
		ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
451
		ctxt->user_regs.rdi = cpu;
452 453
		ctxt->user_regs.rsi = true;  /* entry == true */
	}
454
#endif
455
	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
456
	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
457
	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
J
Jeremy Fitzhardinge 已提交
458 459 460 461 462 463
		BUG();

	kfree(ctxt);
	return 0;
}

464
static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
J
Jeremy Fitzhardinge 已提交
465 466 467
{
	int rc;

468
	common_cpu_up(cpu, idle);
469

470
	xen_setup_runstate_info(cpu);
J
Jeremy Fitzhardinge 已提交
471
	xen_setup_timer(cpu);
472
	xen_init_lock_cpu(cpu);
J
Jeremy Fitzhardinge 已提交
473

474 475 476 477 478 479 480
	/*
	 * PV VCPUs are always successfully taken down (see 'while' loop
	 * in xen_cpu_die()), so -EBUSY is an error.
	 */
	rc = cpu_check_up_prepare(cpu);
	if (rc)
		return rc;
481

J
Jeremy Fitzhardinge 已提交
482 483 484 485 486 487 488
	/* make sure interrupts start blocked */
	per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;

	rc = cpu_initialize_context(cpu, idle);
	if (rc)
		return rc;

489 490
	xen_pmu_init(cpu);

J
Jeremy Fitzhardinge 已提交
491 492 493 494
	rc = xen_smp_intr_init(cpu);
	if (rc)
		return rc;

495
	rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
J
Jeremy Fitzhardinge 已提交
496 497
	BUG_ON(rc);

498
	while (cpu_report_state(cpu) != CPU_ONLINE)
H
Hannes Eder 已提交
499
		HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
500

J
Jeremy Fitzhardinge 已提交
501 502 503
	return 0;
}

504
static void xen_smp_cpus_done(unsigned int max_cpus)
J
Jeremy Fitzhardinge 已提交
505 506 507
{
}

508
#ifdef CONFIG_HOTPLUG_CPU
509
static int xen_cpu_disable(void)
A
Alex Nixon 已提交
510 511 512 513 514 515 516 517 518 519 520
{
	unsigned int cpu = smp_processor_id();
	if (cpu == 0)
		return -EBUSY;

	cpu_disable_common();

	load_cr3(swapper_pg_dir);
	return 0;
}

521
static void xen_cpu_die(unsigned int cpu)
A
Alex Nixon 已提交
522
{
523 524
	while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up,
						     xen_vcpu_nr(cpu), NULL)) {
525
		__set_current_state(TASK_UNINTERRUPTIBLE);
A
Alex Nixon 已提交
526 527
		schedule_timeout(HZ/10);
	}
528

529 530 531 532
	if (common_cpu_die(cpu) == 0) {
		xen_smp_intr_free(cpu);
		xen_uninit_lock_cpu(cpu);
		xen_teardown_timer(cpu);
533
		xen_pmu_finish(cpu);
534
	}
A
Alex Nixon 已提交
535 536
}

537
static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
A
Alex Nixon 已提交
538 539
{
	play_dead_common();
540
	HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
A
Alex Nixon 已提交
541
	cpu_bringup();
542 543 544 545 546 547 548
	/*
	 * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
	 * clears certain data that the cpu_idle loop (which called us
	 * and that we return from) expects. The only way to get that
	 * data back is to call:
	 */
	tick_nohz_idle_enter();
549 550

	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
A
Alex Nixon 已提交
551 552
}

553
#else /* !CONFIG_HOTPLUG_CPU */
554
static int xen_cpu_disable(void)
555 556 557 558
{
	return -ENOSYS;
}

559
static void xen_cpu_die(unsigned int cpu)
560 561 562 563
{
	BUG();
}

564
static void xen_play_dead(void)
565 566 567 568 569
{
	BUG();
}

#endif
J
Jeremy Fitzhardinge 已提交
570 571 572 573 574 575 576 577
static void stop_self(void *v)
{
	int cpu = smp_processor_id();

	/* make sure we're not pinning something down */
	load_cr3(swapper_pg_dir);
	/* should set up a minimal gdt */

578 579
	set_cpu_online(cpu, false);

580
	HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
J
Jeremy Fitzhardinge 已提交
581 582 583
	BUG();
}

584
static void xen_stop_other_cpus(int wait)
J
Jeremy Fitzhardinge 已提交
585
{
586
	smp_call_function(stop_self, NULL, wait);
J
Jeremy Fitzhardinge 已提交
587 588
}

589
static void xen_smp_send_reschedule(int cpu)
J
Jeremy Fitzhardinge 已提交
590 591 592 593
{
	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
}

B
Ben Guthro 已提交
594 595
static void __xen_send_IPI_mask(const struct cpumask *mask,
			      int vector)
J
Jeremy Fitzhardinge 已提交
596 597 598
{
	unsigned cpu;

599
	for_each_cpu_and(cpu, mask, cpu_online_mask)
J
Jeremy Fitzhardinge 已提交
600 601 602
		xen_send_IPI_one(cpu, vector);
}

603
static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
604 605 606
{
	int cpu;

B
Ben Guthro 已提交
607
	__xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
608 609

	/* Make sure other vcpus get a chance to run if they need to. */
610
	for_each_cpu(cpu, mask) {
611
		if (xen_vcpu_stolen(cpu)) {
H
Hannes Eder 已提交
612
			HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
613 614 615 616 617
			break;
		}
	}
}

618
static void xen_smp_send_call_function_single_ipi(int cpu)
619
{
B
Ben Guthro 已提交
620
	__xen_send_IPI_mask(cpumask_of(cpu),
621
			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
622 623
}

B
Ben Guthro 已提交
624 625 626 627 628 629 630 631 632 633 634 635 636 637
static inline int xen_map_vector(int vector)
{
	int xen_vector;

	switch (vector) {
	case RESCHEDULE_VECTOR:
		xen_vector = XEN_RESCHEDULE_VECTOR;
		break;
	case CALL_FUNCTION_VECTOR:
		xen_vector = XEN_CALL_FUNCTION_VECTOR;
		break;
	case CALL_FUNCTION_SINGLE_VECTOR:
		xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR;
		break;
638 639 640
	case IRQ_WORK_VECTOR:
		xen_vector = XEN_IRQ_WORK_VECTOR;
		break;
641 642 643 644 645 646
#ifdef CONFIG_X86_64
	case NMI_VECTOR:
	case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */
		xen_vector = XEN_NMI_VECTOR;
		break;
#endif
B
Ben Guthro 已提交
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
	default:
		xen_vector = -1;
		printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
			vector);
	}

	return xen_vector;
}

void xen_send_IPI_mask(const struct cpumask *mask,
			      int vector)
{
	int xen_vector = xen_map_vector(vector);

	if (xen_vector >= 0)
		__xen_send_IPI_mask(mask, xen_vector);
}

void xen_send_IPI_all(int vector)
{
	int xen_vector = xen_map_vector(vector);

	if (xen_vector >= 0)
		__xen_send_IPI_mask(cpu_online_mask, xen_vector);
}

void xen_send_IPI_self(int vector)
{
	int xen_vector = xen_map_vector(vector);

	if (xen_vector >= 0)
		xen_send_IPI_one(smp_processor_id(), xen_vector);
}

void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
				int vector)
{
	unsigned cpu;
	unsigned int this_cpu = smp_processor_id();
S
Stefan Bader 已提交
686
	int xen_vector = xen_map_vector(vector);
B
Ben Guthro 已提交
687

S
Stefan Bader 已提交
688
	if (!(num_online_cpus() > 1) || (xen_vector < 0))
B
Ben Guthro 已提交
689 690 691 692 693 694
		return;

	for_each_cpu_and(cpu, mask, cpu_online_mask) {
		if (this_cpu == cpu)
			continue;

S
Stefan Bader 已提交
695
		xen_send_IPI_one(cpu, xen_vector);
B
Ben Guthro 已提交
696 697 698 699 700
	}
}

void xen_send_IPI_allbutself(int vector)
{
S
Stefan Bader 已提交
701
	xen_send_IPI_mask_allbutself(cpu_online_mask, vector);
B
Ben Guthro 已提交
702 703
}

J
Jeremy Fitzhardinge 已提交
704 705 706
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
	irq_enter();
707
	generic_smp_call_function_interrupt();
708
	inc_irq_stat(irq_call_count);
J
Jeremy Fitzhardinge 已提交
709 710 711 712 713
	irq_exit();

	return IRQ_HANDLED;
}

714
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
J
Jeremy Fitzhardinge 已提交
715
{
716 717
	irq_enter();
	generic_smp_call_function_single_interrupt();
718
	inc_irq_stat(irq_call_count);
719
	irq_exit();
J
Jeremy Fitzhardinge 已提交
720

721
	return IRQ_HANDLED;
J
Jeremy Fitzhardinge 已提交
722
}
723

724 725 726 727 728 729 730 731 732 733
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
{
	irq_enter();
	irq_work_run();
	inc_irq_stat(apic_irq_work_irqs);
	irq_exit();

	return IRQ_HANDLED;
}

734
static const struct smp_ops xen_smp_ops __initconst = {
735 736 737 738
	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
	.smp_prepare_cpus = xen_smp_prepare_cpus,
	.smp_cpus_done = xen_smp_cpus_done,

A
Alex Nixon 已提交
739 740 741 742 743
	.cpu_up = xen_cpu_up,
	.cpu_die = xen_cpu_die,
	.cpu_disable = xen_cpu_disable,
	.play_dead = xen_play_dead,

744
	.stop_other_cpus = xen_stop_other_cpus,
745 746 747 748 749 750 751 752 753
	.smp_send_reschedule = xen_smp_send_reschedule,

	.send_call_func_ipi = xen_smp_send_call_function_ipi,
	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
};

void __init xen_smp_init(void)
{
	smp_ops = xen_smp_ops;
754
	xen_fill_possible_map();
755
}
756 757 758 759 760 761 762 763 764

static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
	native_smp_prepare_cpus(max_cpus);
	WARN_ON(xen_smp_intr_init(0));

	xen_init_lock_cpu(0);
}

765
static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
766 767
{
	int rc;
768 769 770 771 772 773 774 775 776 777

	/*
	 * This can happen if CPU was offlined earlier and
	 * offlining timed out in common_cpu_die().
	 */
	if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
		xen_smp_intr_free(cpu);
		xen_uninit_lock_cpu(cpu);
	}

778 779 780 781 782 783 784 785 786
	/*
	 * xen_smp_intr_init() needs to run before native_cpu_up()
	 * so that IPI vectors are set up on the booting CPU before
	 * it is marked online in native_cpu_up().
	*/
	rc = xen_smp_intr_init(cpu);
	WARN_ON(rc);
	if (!rc)
		rc =  native_cpu_up(cpu, tidle);
787 788 789 790 791 792 793 794 795

	/*
	 * We must initialize the slowpath CPU kicker _after_ the native
	 * path has executed. If we initialized it before none of the
	 * unlocker IPI kicks would reach the booting CPU as the booting
	 * CPU had not set itself 'online' in cpu_online_mask. That mask
	 * is checked when IPIs are sent (on HVM at least).
	 */
	xen_init_lock_cpu(cpu);
796 797 798 799 800
	return rc;
}

void __init xen_hvm_smp_init(void)
{
801 802
	if (!xen_have_vector_callback)
		return;
803 804 805
	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
	smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
	smp_ops.cpu_up = xen_hvm_cpu_up;
806
	smp_ops.cpu_die = xen_cpu_die;
807 808
	smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
	smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
809
	smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu;
810
}