alternative.c 16.9 KB
Newer Older
1 2
#define pr_fmt(fmt) "SMP alternatives: " fmt

G
Gerd Hoffmann 已提交
3
#include <linux/module.h>
A
Al Viro 已提交
4
#include <linux/sched.h>
5
#include <linux/mutex.h>
G
Gerd Hoffmann 已提交
6
#include <linux/list.h>
7
#include <linux/stringify.h>
8 9 10
#include <linux/kprobes.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
11
#include <linux/memory.h>
12
#include <linux/stop_machine.h>
13
#include <linux/slab.h>
14
#include <linux/kdebug.h>
G
Gerd Hoffmann 已提交
15 16
#include <asm/alternative.h>
#include <asm/sections.h>
17
#include <asm/pgtable.h>
18 19
#include <asm/mce.h>
#include <asm/nmi.h>
20
#include <asm/cacheflush.h>
21
#include <asm/tlbflush.h>
22
#include <asm/io.h>
23
#include <asm/fixmap.h>
G
Gerd Hoffmann 已提交
24

25 26
#define MAX_PATCH_LEN (255-1)

27
static int __initdata_or_module debug_alternative;
28

29 30 31 32 33 34 35
static int __init debug_alt(char *str)
{
	debug_alternative = 1;
	return 1;
}
__setup("debug-alternative", debug_alt);

36 37
static int noreplace_smp;

38 39 40 41 42 43 44
static int __init setup_noreplace_smp(char *str)
{
	noreplace_smp = 1;
	return 1;
}
__setup("noreplace-smp", setup_noreplace_smp);

45
#ifdef CONFIG_PARAVIRT
46
static int __initdata_or_module noreplace_paravirt = 0;
47 48 49 50 51 52 53 54

static int __init setup_noreplace_paravirt(char *str)
{
	noreplace_paravirt = 1;
	return 1;
}
__setup("noreplace-paravirt", setup_noreplace_paravirt);
#endif
55

56 57 58 59 60
#define DPRINTK(fmt, ...)				\
do {							\
	if (debug_alternative)				\
		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
} while (0)
61

62 63 64 65 66 67 68 69 70
/*
 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
 * that correspond to that nop. Getting from one nop to the next, we
 * add to the array the offset that is equal to the sum of all sizes of
 * nops preceding the one we are after.
 *
 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
 * nice symmetry of sizes of the previous nops.
 */
71
#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
72 73 74 75 76 77 78 79 80 81 82 83 84 85
static const unsigned char intelnops[] =
{
	GENERIC_NOP1,
	GENERIC_NOP2,
	GENERIC_NOP3,
	GENERIC_NOP4,
	GENERIC_NOP5,
	GENERIC_NOP6,
	GENERIC_NOP7,
	GENERIC_NOP8,
	GENERIC_NOP5_ATOMIC
};
static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
{
G
Gerd Hoffmann 已提交
86 87 88 89 90 91 92 93 94
	NULL,
	intelnops,
	intelnops + 1,
	intelnops + 1 + 2,
	intelnops + 1 + 2 + 3,
	intelnops + 1 + 2 + 3 + 4,
	intelnops + 1 + 2 + 3 + 4 + 5,
	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
95
	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
G
Gerd Hoffmann 已提交
96
};
97 98 99
#endif

#ifdef K8_NOP1
100 101 102 103 104 105 106 107 108 109 110 111 112 113
static const unsigned char k8nops[] =
{
	K8_NOP1,
	K8_NOP2,
	K8_NOP3,
	K8_NOP4,
	K8_NOP5,
	K8_NOP6,
	K8_NOP7,
	K8_NOP8,
	K8_NOP5_ATOMIC
};
static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
{
G
Gerd Hoffmann 已提交
114 115 116 117 118 119 120 121 122
	NULL,
	k8nops,
	k8nops + 1,
	k8nops + 1 + 2,
	k8nops + 1 + 2 + 3,
	k8nops + 1 + 2 + 3 + 4,
	k8nops + 1 + 2 + 3 + 4 + 5,
	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
123
	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
G
Gerd Hoffmann 已提交
124
};
125 126
#endif

127
#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
128 129 130 131 132 133 134 135 136 137 138 139 140 141
static const unsigned char k7nops[] =
{
	K7_NOP1,
	K7_NOP2,
	K7_NOP3,
	K7_NOP4,
	K7_NOP5,
	K7_NOP6,
	K7_NOP7,
	K7_NOP8,
	K7_NOP5_ATOMIC
};
static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
{
G
Gerd Hoffmann 已提交
142 143 144 145 146 147 148 149 150
	NULL,
	k7nops,
	k7nops + 1,
	k7nops + 1 + 2,
	k7nops + 1 + 2 + 3,
	k7nops + 1 + 2 + 3 + 4,
	k7nops + 1 + 2 + 3 + 4 + 5,
	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
151
	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
G
Gerd Hoffmann 已提交
152
};
153 154
#endif

155
#ifdef P6_NOP1
156
static const unsigned char p6nops[] =
157 158 159 160 161 162 163 164 165 166 167 168 169
{
	P6_NOP1,
	P6_NOP2,
	P6_NOP3,
	P6_NOP4,
	P6_NOP5,
	P6_NOP6,
	P6_NOP7,
	P6_NOP8,
	P6_NOP5_ATOMIC
};
static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
{
170 171 172 173 174 175 176 177 178
	NULL,
	p6nops,
	p6nops + 1,
	p6nops + 1 + 2,
	p6nops + 1 + 2 + 3,
	p6nops + 1 + 2 + 3 + 4,
	p6nops + 1 + 2 + 3 + 4 + 5,
	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
179
	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
180 181 182
};
#endif

183
/* Initialize these to a safe default */
184
#ifdef CONFIG_X86_64
185 186 187 188
const unsigned char * const *ideal_nops = p6_nops;
#else
const unsigned char * const *ideal_nops = intel_nops;
#endif
189

190
void __init arch_init_ideal_nops(void)
191
{
192 193
	switch (boot_cpu_data.x86_vendor) {
	case X86_VENDOR_INTEL:
194 195 196 197 198 199 200 201 202 203 204 205 206
		/*
		 * Due to a decoder implementation quirk, some
		 * specific Intel CPUs actually perform better with
		 * the "k8_nops" than with the SDM-recommended NOPs.
		 */
		if (boot_cpu_data.x86 == 6 &&
		    boot_cpu_data.x86_model >= 0x0f &&
		    boot_cpu_data.x86_model != 0x1c &&
		    boot_cpu_data.x86_model != 0x26 &&
		    boot_cpu_data.x86_model != 0x27 &&
		    boot_cpu_data.x86_model < 0x30) {
			ideal_nops = k8_nops;
		} else if (boot_cpu_has(X86_FEATURE_NOPL)) {
207 208 209 210 211 212 213 214
			   ideal_nops = p6_nops;
		} else {
#ifdef CONFIG_X86_64
			ideal_nops = k8_nops;
#else
			ideal_nops = intel_nops;
#endif
		}
215
		break;
216 217 218 219 220 221 222 223 224 225 226 227
	default:
#ifdef CONFIG_X86_64
		ideal_nops = k8_nops;
#else
		if (boot_cpu_has(X86_FEATURE_K8))
			ideal_nops = k8_nops;
		else if (boot_cpu_has(X86_FEATURE_K7))
			ideal_nops = k7_nops;
		else
			ideal_nops = intel_nops;
#endif
	}
G
Gerd Hoffmann 已提交
228 229
}

230
/* Use this to add nops to a buffer, then text_poke the whole buffer. */
231
static void __init_or_module add_nops(void *insns, unsigned int len)
232 233 234 235 236
{
	while (len > 0) {
		unsigned int noplen = len;
		if (noplen > ASM_NOP_MAX)
			noplen = ASM_NOP_MAX;
237
		memcpy(insns, ideal_nops[noplen], noplen);
238 239 240 241 242
		insns += noplen;
		len -= noplen;
	}
}

243
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
244
extern s32 __smp_locks[], __smp_locks_end[];
245
void *text_poke_early(void *addr, const void *opcode, size_t len);
246

G
Gerd Hoffmann 已提交
247 248
/* Replace instructions with better alternatives for this CPU type.
   This runs before SMP is initialized to avoid SMP problems with
L
Lucas De Marchi 已提交
249
   self modifying code. This implies that asymmetric systems where
G
Gerd Hoffmann 已提交
250 251 252
   APs have less capabilities than the boot processor are not handled.
   Tough. Make sure you disable such features by hand. */

253 254
void __init_or_module apply_alternatives(struct alt_instr *start,
					 struct alt_instr *end)
G
Gerd Hoffmann 已提交
255 256
{
	struct alt_instr *a;
257
	u8 *instr, *replacement;
258
	u8 insnbuf[MAX_PATCH_LEN];
G
Gerd Hoffmann 已提交
259

260
	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
261 262 263 264 265 266 267 268 269
	/*
	 * The scan order should be from start to end. A later scanned
	 * alternative code can overwrite a previous scanned alternative code.
	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
	 * patch code.
	 *
	 * So be careful if you want to change the scan order to any other
	 * order.
	 */
G
Gerd Hoffmann 已提交
270
	for (a = start; a < end; a++) {
271 272
		instr = (u8 *)&a->instr_offset + a->instr_offset;
		replacement = (u8 *)&a->repl_offset + a->repl_offset;
G
Gerd Hoffmann 已提交
273
		BUG_ON(a->replacementlen > a->instrlen);
274
		BUG_ON(a->instrlen > sizeof(insnbuf));
275
		BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
G
Gerd Hoffmann 已提交
276 277
		if (!boot_cpu_has(a->cpuid))
			continue;
278 279 280 281 282 283 284 285 286 287

		memcpy(insnbuf, replacement, a->replacementlen);

		/* 0xe8 is a relative jump; fix the offset. */
		if (*insnbuf == 0xe8 && a->replacementlen == 5)
		    *(s32 *)(insnbuf + 1) += replacement - instr;

		add_nops(insnbuf + a->replacementlen,
			 a->instrlen - a->replacementlen);

288
		text_poke_early(instr, insnbuf, a->instrlen);
G
Gerd Hoffmann 已提交
289 290 291
	}
}

292 293
#ifdef CONFIG_SMP

294 295
static void alternatives_smp_lock(const s32 *start, const s32 *end,
				  u8 *text, u8 *text_end)
G
Gerd Hoffmann 已提交
296
{
297
	const s32 *poff;
G
Gerd Hoffmann 已提交
298

299
	mutex_lock(&text_mutex);
300 301 302 303
	for (poff = start; poff < end; poff++) {
		u8 *ptr = (u8 *)poff + *poff;

		if (!*poff || ptr < text || ptr >= text_end)
G
Gerd Hoffmann 已提交
304
			continue;
305
		/* turn DS segment override prefix into lock prefix */
306 307
		if (*ptr == 0x3e)
			text_poke(ptr, ((unsigned char []){0xf0}), 1);
308
	}
309
	mutex_unlock(&text_mutex);
G
Gerd Hoffmann 已提交
310 311
}

312 313
static void alternatives_smp_unlock(const s32 *start, const s32 *end,
				    u8 *text, u8 *text_end)
G
Gerd Hoffmann 已提交
314
{
315
	const s32 *poff;
G
Gerd Hoffmann 已提交
316

317
	mutex_lock(&text_mutex);
318 319 320 321
	for (poff = start; poff < end; poff++) {
		u8 *ptr = (u8 *)poff + *poff;

		if (!*poff || ptr < text || ptr >= text_end)
G
Gerd Hoffmann 已提交
322
			continue;
323
		/* turn lock prefix into DS segment override prefix */
324 325
		if (*ptr == 0xf0)
			text_poke(ptr, ((unsigned char []){0x3E}), 1);
326
	}
327
	mutex_unlock(&text_mutex);
G
Gerd Hoffmann 已提交
328 329 330 331 332 333 334 335
}

struct smp_alt_module {
	/* what is this ??? */
	struct module	*mod;
	char		*name;

	/* ptrs to lock prefixes */
336 337
	const s32	*locks;
	const s32	*locks_end;
G
Gerd Hoffmann 已提交
338 339 340 341 342 343 344 345

	/* .text segment, needed to avoid patching init code ;) */
	u8		*text;
	u8		*text_end;

	struct list_head next;
};
static LIST_HEAD(smp_alt_modules);
346
static DEFINE_MUTEX(smp_alt);
347
static bool uniproc_patched = false;	/* protected by smp_alt */
G
Gerd Hoffmann 已提交
348

349 350 351 352
void __init_or_module alternatives_smp_module_add(struct module *mod,
						  char *name,
						  void *locks, void *locks_end,
						  void *text,  void *text_end)
G
Gerd Hoffmann 已提交
353 354 355
{
	struct smp_alt_module *smp;

356 357 358
	mutex_lock(&smp_alt);
	if (!uniproc_patched)
		goto unlock;
359

360 361 362
	if (num_possible_cpus() == 1)
		/* Don't bother remembering, we'll never have to undo it. */
		goto smp_unlock;
G
Gerd Hoffmann 已提交
363 364 365

	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
	if (NULL == smp)
366 367
		/* we'll run the (safe but slow) SMP code then ... */
		goto unlock;
G
Gerd Hoffmann 已提交
368 369 370 371 372 373 374 375

	smp->mod	= mod;
	smp->name	= name;
	smp->locks	= locks;
	smp->locks_end	= locks_end;
	smp->text	= text;
	smp->text_end	= text_end;
	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
376
		__func__, smp->locks, smp->locks_end,
G
Gerd Hoffmann 已提交
377 378 379
		smp->text, smp->text_end, smp->name);

	list_add_tail(&smp->next, &smp_alt_modules);
380 381 382
smp_unlock:
	alternatives_smp_unlock(locks, locks_end, text, text_end);
unlock:
383
	mutex_unlock(&smp_alt);
G
Gerd Hoffmann 已提交
384 385
}

386
void __init_or_module alternatives_smp_module_del(struct module *mod)
G
Gerd Hoffmann 已提交
387 388 389
{
	struct smp_alt_module *item;

390
	mutex_lock(&smp_alt);
G
Gerd Hoffmann 已提交
391 392 393 394 395
	list_for_each_entry(item, &smp_alt_modules, next) {
		if (mod != item->mod)
			continue;
		list_del(&item->next);
		kfree(item);
396
		break;
G
Gerd Hoffmann 已提交
397
	}
398
	mutex_unlock(&smp_alt);
G
Gerd Hoffmann 已提交
399 400
}

401
void alternatives_enable_smp(void)
G
Gerd Hoffmann 已提交
402 403 404
{
	struct smp_alt_module *mod;

405 406
#ifdef CONFIG_LOCKDEP
	/*
407 408 409 410 411
	 * Older binutils section handling bug prevented
	 * alternatives-replacement from working reliably.
	 *
	 * If this still occurs then you should see a hang
	 * or crash shortly after this line:
412
	 */
413
	pr_info("lockdep: fixing up alternatives\n");
414 415
#endif

416 417
	/* Why bother if there are no other CPUs? */
	BUG_ON(num_possible_cpus() == 1);
G
Gerd Hoffmann 已提交
418

419
	mutex_lock(&smp_alt);
420

421
	if (uniproc_patched) {
422
		pr_info("switching to SMP code\n");
423
		BUG_ON(num_online_cpus() != 1);
424 425
		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
G
Gerd Hoffmann 已提交
426 427 428
		list_for_each_entry(mod, &smp_alt_modules, next)
			alternatives_smp_lock(mod->locks, mod->locks_end,
					      mod->text, mod->text_end);
429
		uniproc_patched = false;
G
Gerd Hoffmann 已提交
430
	}
431
	mutex_unlock(&smp_alt);
G
Gerd Hoffmann 已提交
432 433
}

434 435 436 437
/* Return 1 if the address range is reserved for smp-alternatives */
int alternatives_text_reserved(void *start, void *end)
{
	struct smp_alt_module *mod;
438
	const s32 *poff;
439 440
	u8 *text_start = start;
	u8 *text_end = end;
441 442

	list_for_each_entry(mod, &smp_alt_modules, next) {
443
		if (mod->text > text_end || mod->text_end < text_start)
444
			continue;
445 446 447 448
		for (poff = mod->locks; poff < mod->locks_end; poff++) {
			const u8 *ptr = (const u8 *)poff + *poff;

			if (text_start <= ptr && text_end > ptr)
449
				return 1;
450
		}
451 452 453 454
	}

	return 0;
}
455 456
#endif

457
#ifdef CONFIG_PARAVIRT
458 459
void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
				     struct paravirt_patch_site *end)
460
{
461
	struct paravirt_patch_site *p;
462
	char insnbuf[MAX_PATCH_LEN];
463

464 465 466
	if (noreplace_paravirt)
		return;

467 468 469
	for (p = start; p < end; p++) {
		unsigned int used;

470
		BUG_ON(p->len > MAX_PATCH_LEN);
471 472
		/* prep the buffer with the original instructions */
		memcpy(insnbuf, p->instr, p->len);
473 474
		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
					 (unsigned long)p->instr, p->len);
475

476 477
		BUG_ON(used > p->len);

478
		/* Pad the rest with nops */
479
		add_nops(insnbuf + used, p->len - used);
480
		text_poke_early(p->instr, insnbuf, p->len);
481 482
	}
}
483
extern struct paravirt_patch_site __start_parainstructions[],
484 485 486
	__stop_parainstructions[];
#endif	/* CONFIG_PARAVIRT */

G
Gerd Hoffmann 已提交
487 488
void __init alternative_instructions(void)
{
489 490 491 492
	/* The patching is not fully atomic, so try to avoid local interruptions
	   that might execute the to be patched code.
	   Other CPUs are not running. */
	stop_nmi();
493 494 495 496 497 498 499 500 501 502 503

	/*
	 * Don't stop machine check exceptions while patching.
	 * MCEs only happen when something got corrupted and in this
	 * case we must do something about the corruption.
	 * Ignoring it is worse than a unlikely patching race.
	 * Also machine checks tend to be broadcast and if one CPU
	 * goes into machine check the others follow quickly, so we don't
	 * expect a machine check to cause undue problems during to code
	 * patching.
	 */
504

G
Gerd Hoffmann 已提交
505 506
	apply_alternatives(__alt_instructions, __alt_instructions_end);

507
#ifdef CONFIG_SMP
508 509 510
	/* Patch to UP if other cpus not imminent. */
	if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
		uniproc_patched = true;
G
Gerd Hoffmann 已提交
511 512 513 514
		alternatives_smp_module_add(NULL, "core kernel",
					    __smp_locks, __smp_locks_end,
					    _text, _etext);
	}
515

516
	if (!uniproc_patched || num_possible_cpus() == 1)
517 518 519
		free_init_pages("SMP alternatives",
				(unsigned long)__smp_locks,
				(unsigned long)__smp_locks_end);
520 521 522
#endif

	apply_paravirt(__parainstructions, __parainstructions_end);
523

524
	restart_nmi();
G
Gerd Hoffmann 已提交
525
}
526

527 528 529 530 531 532
/**
 * text_poke_early - Update instructions on a live kernel at boot time
 * @addr: address to modify
 * @opcode: source of the copy
 * @len: length to copy
 *
533 534
 * When you use this code to patch more than one byte of an instruction
 * you need to make sure that other CPUs cannot execute this code in parallel.
535 536 537
 * Also no thread must be currently preempted in the middle of these
 * instructions. And on the local CPU you need to be protected again NMI or MCE
 * handlers seeing an inconsistent instruction while you patch.
538
 */
539
void *__init_or_module text_poke_early(void *addr, const void *opcode,
540
					      size_t len)
541
{
542 543
	unsigned long flags;
	local_irq_save(flags);
544
	memcpy(addr, opcode, len);
545
	sync_core();
546
	local_irq_restore(flags);
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561
	/* Could also do a CLFLUSH here to speed up CPU recovery; but
	   that causes hangs on some VIA CPUs. */
	return addr;
}

/**
 * text_poke - Update instructions on a live kernel
 * @addr: address to modify
 * @opcode: source of the copy
 * @len: length to copy
 *
 * Only atomic text poke/set should be allowed when not doing early patching.
 * It means the size must be writable atomically and the address must be aligned
 * in a way that permits an atomic write. It also makes sure we fit on a single
 * page.
562 563
 *
 * Note: Must be called under text_mutex.
564 565 566
 */
void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
{
567
	unsigned long flags;
568
	char *vaddr;
M
Mathieu Desnoyers 已提交
569 570
	struct page *pages[2];
	int i;
571

M
Mathieu Desnoyers 已提交
572 573 574
	if (!core_kernel_text((unsigned long)addr)) {
		pages[0] = vmalloc_to_page(addr);
		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
575
	} else {
M
Mathieu Desnoyers 已提交
576
		pages[0] = virt_to_page(addr);
I
Ingo Molnar 已提交
577
		WARN_ON(!PageReserved(pages[0]));
M
Mathieu Desnoyers 已提交
578
		pages[1] = virt_to_page(addr + PAGE_SIZE);
579
	}
M
Mathieu Desnoyers 已提交
580
	BUG_ON(!pages[0]);
581
	local_irq_save(flags);
582 583 584 585
	set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
	if (pages[1])
		set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
	vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
M
Mathieu Desnoyers 已提交
586
	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
587 588 589 590
	clear_fixmap(FIX_TEXT_POKE0);
	if (pages[1])
		clear_fixmap(FIX_TEXT_POKE1);
	local_flush_tlb();
591
	sync_core();
592 593
	/* Could also do a CLFLUSH here to speed up CPU recovery; but
	   that causes hangs on some VIA CPUs. */
M
Mathieu Desnoyers 已提交
594 595
	for (i = 0; i < len; i++)
		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
596
	local_irq_restore(flags);
597
	return addr;
598
}
599

600 601 602 603 604 605 606 607
static void do_sync_core(void *info)
{
	sync_core();
}

static bool bp_patching_in_progress;
static void *bp_int3_handler, *bp_int3_addr;

608
int poke_int3_handler(struct pt_regs *regs)
609 610 611 612 613
{
	/* bp_patching_in_progress */
	smp_rmb();

	if (likely(!bp_patching_in_progress))
614
		return 0;
615

616 617
	if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
		return 0;
618 619

	/* set up the specified breakpoint handler */
620 621 622
	regs->ip = (unsigned long) bp_int3_handler;

	return 1;
623 624

}
625

626 627 628 629 630 631 632 633
/**
 * text_poke_bp() -- update instructions on live kernel on SMP
 * @addr:	address to patch
 * @opcode:	opcode of new instruction
 * @len:	length to copy
 * @handler:	address to jump to when the temporary breakpoint is hit
 *
 * Modify multi-byte instruction by using int3 breakpoint on SMP.
634 635
 * We completely avoid stop_machine() here, and achieve the
 * synchronization using int3 breakpoint.
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
 *
 * The way it is done:
 *	- add a int3 trap to the address that will be patched
 *	- sync cores
 *	- update all but the first byte of the patched range
 *	- sync cores
 *	- replace the first byte (int3) by the first byte of
 *	  replacing opcode
 *	- sync cores
 *
 * Note: must be called under text_mutex.
 */
void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
{
	unsigned char int3 = 0xcc;

	bp_int3_handler = handler;
	bp_int3_addr = (u8 *)addr + sizeof(int3);
	bp_patching_in_progress = true;
	/*
	 * Corresponding read barrier in int3 notifier for
	 * making sure the in_progress flags is correctly ordered wrt.
	 * patching
	 */
	smp_wmb();

	text_poke(addr, &int3, sizeof(int3));

	on_each_cpu(do_sync_core, NULL, 1);

	if (len - sizeof(int3) > 0) {
		/* patch all but the first byte */
		text_poke((char *)addr + sizeof(int3),
			  (const char *) opcode + sizeof(int3),
			  len - sizeof(int3));
		/*
		 * According to Intel, this core syncing is very likely
		 * not necessary and we'd be safe even without it. But
		 * better safe than sorry (plus there's not only Intel).
		 */
		on_each_cpu(do_sync_core, NULL, 1);
	}

	/* patch the first byte */
	text_poke(addr, opcode, sizeof(int3));

	on_each_cpu(do_sync_core, NULL, 1);

	bp_patching_in_progress = false;
	smp_wmb();

	return addr;
}