ptrace.c 42.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4
/* By Ross Biro 1/23/92 */
/*
 * Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
5 6 7
 *
 * BTS tracing
 *	Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15
 */

#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
16
#include <linux/regset.h>
R
Roland McGrath 已提交
17
#include <linux/tracehook.h>
L
Linus Torvalds 已提交
18
#include <linux/user.h>
R
Roland McGrath 已提交
19
#include <linux/elf.h>
L
Linus Torvalds 已提交
20 21 22
#include <linux/security.h>
#include <linux/audit.h>
#include <linux/seccomp.h>
23
#include <linux/signal.h>
24
#include <linux/workqueue.h>
25 26
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
L
Linus Torvalds 已提交
27 28 29 30 31 32 33 34 35

#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/debugreg.h>
#include <asm/ldt.h>
#include <asm/desc.h>
36 37
#include <asm/prctl.h>
#include <asm/proto.h>
38
#include <asm/ds.h>
39
#include <asm/hw_breakpoint.h>
40

R
Roland McGrath 已提交
41 42
#include "tls.h"

43 44 45
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

R
Roland McGrath 已提交
46 47 48 49
enum x86_regset {
	REGSET_GENERAL,
	REGSET_FP,
	REGSET_XFP,
R
Roland McGrath 已提交
50
	REGSET_IOPERM64 = REGSET_XFP,
R
Roland McGrath 已提交
51
	REGSET_TLS,
R
Roland McGrath 已提交
52
	REGSET_IOPERM32,
R
Roland McGrath 已提交
53
};
54

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
struct pt_regs_offset {
	const char *name;
	int offset;
};

#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
#define REG_OFFSET_END {.name = NULL, .offset = 0}

static const struct pt_regs_offset regoffset_table[] = {
#ifdef CONFIG_X86_64
	REG_OFFSET_NAME(r15),
	REG_OFFSET_NAME(r14),
	REG_OFFSET_NAME(r13),
	REG_OFFSET_NAME(r12),
	REG_OFFSET_NAME(r11),
	REG_OFFSET_NAME(r10),
	REG_OFFSET_NAME(r9),
	REG_OFFSET_NAME(r8),
#endif
	REG_OFFSET_NAME(bx),
	REG_OFFSET_NAME(cx),
	REG_OFFSET_NAME(dx),
	REG_OFFSET_NAME(si),
	REG_OFFSET_NAME(di),
	REG_OFFSET_NAME(bp),
	REG_OFFSET_NAME(ax),
#ifdef CONFIG_X86_32
	REG_OFFSET_NAME(ds),
	REG_OFFSET_NAME(es),
	REG_OFFSET_NAME(fs),
	REG_OFFSET_NAME(gs),
#endif
	REG_OFFSET_NAME(orig_ax),
	REG_OFFSET_NAME(ip),
	REG_OFFSET_NAME(cs),
	REG_OFFSET_NAME(flags),
	REG_OFFSET_NAME(sp),
	REG_OFFSET_NAME(ss),
	REG_OFFSET_END,
};

/**
 * regs_query_register_offset() - query register offset from its name
 * @name:	the name of a register
 *
 * regs_query_register_offset() returns the offset of a register in struct
 * pt_regs from its name. If the name is invalid, this returns -EINVAL;
 */
int regs_query_register_offset(const char *name)
{
	const struct pt_regs_offset *roff;
	for (roff = regoffset_table; roff->name != NULL; roff++)
		if (!strcmp(roff->name, name))
			return roff->offset;
	return -EINVAL;
}

/**
 * regs_query_register_name() - query register name from its offset
 * @offset:	the offset of a register in struct pt_regs.
 *
 * regs_query_register_name() returns the name of a register from its
 * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
 */
const char *regs_query_register_name(unsigned int offset)
{
	const struct pt_regs_offset *roff;
	for (roff = regoffset_table; roff->name != NULL; roff++)
		if (roff->offset == offset)
			return roff->name;
	return NULL;
}

static const int arg_offs_table[] = {
#ifdef CONFIG_X86_32
	[0] = offsetof(struct pt_regs, ax),
	[1] = offsetof(struct pt_regs, dx),
	[2] = offsetof(struct pt_regs, cx)
#else /* CONFIG_X86_64 */
	[0] = offsetof(struct pt_regs, di),
	[1] = offsetof(struct pt_regs, si),
	[2] = offsetof(struct pt_regs, dx),
	[3] = offsetof(struct pt_regs, cx),
	[4] = offsetof(struct pt_regs, r8),
	[5] = offsetof(struct pt_regs, r9)
#endif
};

/**
 * regs_get_argument_nth() - get Nth argument at function call
 * @regs:	pt_regs which contains registers at function entry.
 * @n:		argument number.
 *
 * regs_get_argument_nth() returns @n th argument of a function call.
 * Since usually the kernel stack will be changed right after function entry,
 * you must use this at function entry. If the @n th entry is NOT in the
 * kernel stack or pt_regs, this returns 0.
 */
unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
{
	if (n < ARRAY_SIZE(arg_offs_table))
156
		return *(unsigned long *)((char *)regs + arg_offs_table[n]);
157 158 159 160 161 162 163 164 165 166
	else {
		/*
		 * The typical case: arg n is on the stack.
		 * (Note: stack[0] = return address, so skip it)
		 */
		n -= ARRAY_SIZE(arg_offs_table);
		return regs_get_kernel_stack_nth(regs, 1 + n);
	}
}

L
Linus Torvalds 已提交
167 168 169 170 171
/*
 * does not yet catch signals sent when the child dies.
 * in exit.c or in signal.c.
 */

172 173 174
/*
 * Determines which flags the user has access to [1 = access, 0 = no access].
 */
R
Roland McGrath 已提交
175 176 177 178 179 180 181
#define FLAG_MASK_32		((unsigned long)			\
				 (X86_EFLAGS_CF | X86_EFLAGS_PF |	\
				  X86_EFLAGS_AF | X86_EFLAGS_ZF |	\
				  X86_EFLAGS_SF | X86_EFLAGS_TF |	\
				  X86_EFLAGS_DF | X86_EFLAGS_OF |	\
				  X86_EFLAGS_RF | X86_EFLAGS_AC))

182 183 184 185 186 187 188 189 190 191
/*
 * Determines whether a value may be installed in a segment register.
 */
static inline bool invalid_selector(u16 value)
{
	return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL);
}

#ifdef CONFIG_X86_32

R
Roland McGrath 已提交
192
#define FLAG_MASK		FLAG_MASK_32
L
Linus Torvalds 已提交
193

194
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
L
Linus Torvalds 已提交
195
{
196
	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
197
	return &regs->bx + (regno >> 2);
L
Linus Torvalds 已提交
198 199
}

200
static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
L
Linus Torvalds 已提交
201
{
202 203 204 205 206 207 208 209
	/*
	 * Returning the value truncates it to 16 bits.
	 */
	unsigned int retval;
	if (offset != offsetof(struct user_regs_struct, gs))
		retval = *pt_regs_access(task_pt_regs(task), offset);
	else {
		if (task == current)
T
Tejun Heo 已提交
210 211 212
			retval = get_user_gs(task_pt_regs(task));
		else
			retval = task_user_gs(task);
213 214 215 216 217 218 219 220 221 222
	}
	return retval;
}

static int set_segment_reg(struct task_struct *task,
			   unsigned long offset, u16 value)
{
	/*
	 * The value argument was already truncated to 16 bits.
	 */
223
	if (invalid_selector(value))
224 225
		return -EIO;

R
Roland McGrath 已提交
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
	/*
	 * For %cs and %ss we cannot permit a null selector.
	 * We can permit a bogus selector as long as it has USER_RPL.
	 * Null selectors are fine for other segment registers, but
	 * we will never get back to user mode with invalid %cs or %ss
	 * and will take the trap in iret instead.  Much code relies
	 * on user_mode() to distinguish a user trap frame (which can
	 * safely use invalid selectors) from a kernel trap frame.
	 */
	switch (offset) {
	case offsetof(struct user_regs_struct, cs):
	case offsetof(struct user_regs_struct, ss):
		if (unlikely(value == 0))
			return -EIO;

	default:
242
		*pt_regs_access(task_pt_regs(task), offset) = value;
R
Roland McGrath 已提交
243 244 245
		break;

	case offsetof(struct user_regs_struct, gs):
246
		if (task == current)
T
Tejun Heo 已提交
247 248 249
			set_user_gs(task_pt_regs(task), value);
		else
			task_user_gs(task) = value;
L
Linus Torvalds 已提交
250
	}
251

L
Linus Torvalds 已提交
252 253 254
	return 0;
}

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
#else  /* CONFIG_X86_64 */

#define FLAG_MASK		(FLAG_MASK_32 | X86_EFLAGS_NT)

static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
{
	BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
	return &regs->r15 + (offset / sizeof(regs->r15));
}

static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
{
	/*
	 * Returning the value truncates it to 16 bits.
	 */
	unsigned int seg;

	switch (offset) {
	case offsetof(struct user_regs_struct, fs):
		if (task == current) {
			/* Older gas can't assemble movq %?s,%r?? */
			asm("movl %%fs,%0" : "=r" (seg));
			return seg;
		}
		return task->thread.fsindex;
	case offsetof(struct user_regs_struct, gs):
		if (task == current) {
			asm("movl %%gs,%0" : "=r" (seg));
			return seg;
		}
		return task->thread.gsindex;
	case offsetof(struct user_regs_struct, ds):
		if (task == current) {
			asm("movl %%ds,%0" : "=r" (seg));
			return seg;
		}
		return task->thread.ds;
	case offsetof(struct user_regs_struct, es):
		if (task == current) {
			asm("movl %%es,%0" : "=r" (seg));
			return seg;
		}
		return task->thread.es;

	case offsetof(struct user_regs_struct, cs):
	case offsetof(struct user_regs_struct, ss):
		break;
	}
	return *pt_regs_access(task_pt_regs(task), offset);
}

static int set_segment_reg(struct task_struct *task,
			   unsigned long offset, u16 value)
{
	/*
	 * The value argument was already truncated to 16 bits.
	 */
	if (invalid_selector(value))
		return -EIO;

	switch (offset) {
	case offsetof(struct user_regs_struct,fs):
		/*
		 * If this is setting fs as for normal 64-bit use but
		 * setting fs_base has implicitly changed it, leave it.
		 */
		if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
		     task->thread.fs != 0) ||
		    (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
		     task->thread.fs == 0))
			break;
		task->thread.fsindex = value;
		if (task == current)
			loadsegment(fs, task->thread.fsindex);
		break;
	case offsetof(struct user_regs_struct,gs):
		/*
		 * If this is setting gs as for normal 64-bit use but
		 * setting gs_base has implicitly changed it, leave it.
		 */
		if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
		     task->thread.gs != 0) ||
		    (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
		     task->thread.gs == 0))
			break;
		task->thread.gsindex = value;
		if (task == current)
			load_gs_index(task->thread.gsindex);
		break;
	case offsetof(struct user_regs_struct,ds):
		task->thread.ds = value;
		if (task == current)
			loadsegment(ds, task->thread.ds);
		break;
	case offsetof(struct user_regs_struct,es):
		task->thread.es = value;
		if (task == current)
			loadsegment(es, task->thread.es);
		break;

		/*
		 * Can't actually change these in 64-bit mode.
		 */
	case offsetof(struct user_regs_struct,cs):
R
Roland McGrath 已提交
359 360
		if (unlikely(value == 0))
			return -EIO;
361 362 363 364
#ifdef CONFIG_IA32_EMULATION
		if (test_tsk_thread_flag(task, TIF_IA32))
			task_pt_regs(task)->cs = value;
#endif
365
		break;
366
	case offsetof(struct user_regs_struct,ss):
R
Roland McGrath 已提交
367 368
		if (unlikely(value == 0))
			return -EIO;
369 370 371 372
#ifdef CONFIG_IA32_EMULATION
		if (test_tsk_thread_flag(task, TIF_IA32))
			task_pt_regs(task)->ss = value;
#endif
373
		break;
374 375 376 377 378 379 380
	}

	return 0;
}

#endif	/* CONFIG_X86_32 */

381
static unsigned long get_flags(struct task_struct *task)
L
Linus Torvalds 已提交
382
{
383 384 385 386 387 388 389
	unsigned long retval = task_pt_regs(task)->flags;

	/*
	 * If the debugger set TF, hide it from the readout.
	 */
	if (test_tsk_thread_flag(task, TIF_FORCED_TF))
		retval &= ~X86_EFLAGS_TF;
L
Linus Torvalds 已提交
390 391 392 393

	return retval;
}

394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
static int set_flags(struct task_struct *task, unsigned long value)
{
	struct pt_regs *regs = task_pt_regs(task);

	/*
	 * If the user value contains TF, mark that
	 * it was not "us" (the debugger) that set it.
	 * If not, make sure it stays set if we had.
	 */
	if (value & X86_EFLAGS_TF)
		clear_tsk_thread_flag(task, TIF_FORCED_TF);
	else if (test_tsk_thread_flag(task, TIF_FORCED_TF))
		value |= X86_EFLAGS_TF;

	regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK);

	return 0;
}

static int putreg(struct task_struct *child,
		  unsigned long offset, unsigned long value)
{
	switch (offset) {
	case offsetof(struct user_regs_struct, cs):
	case offsetof(struct user_regs_struct, ds):
	case offsetof(struct user_regs_struct, es):
	case offsetof(struct user_regs_struct, fs):
	case offsetof(struct user_regs_struct, gs):
	case offsetof(struct user_regs_struct, ss):
		return set_segment_reg(child, offset, value);

	case offsetof(struct user_regs_struct, flags):
		return set_flags(child, value);
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449

#ifdef CONFIG_X86_64
	case offsetof(struct user_regs_struct,fs_base):
		if (value >= TASK_SIZE_OF(child))
			return -EIO;
		/*
		 * When changing the segment base, use do_arch_prctl
		 * to set either thread.fs or thread.fsindex and the
		 * corresponding GDT slot.
		 */
		if (child->thread.fs != value)
			return do_arch_prctl(child, ARCH_SET_FS, value);
		return 0;
	case offsetof(struct user_regs_struct,gs_base):
		/*
		 * Exactly the same here as the %fs handling above.
		 */
		if (value >= TASK_SIZE_OF(child))
			return -EIO;
		if (child->thread.gs != value)
			return do_arch_prctl(child, ARCH_SET_GS, value);
		return 0;
#endif
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
	}

	*pt_regs_access(task_pt_regs(child), offset) = value;
	return 0;
}

static unsigned long getreg(struct task_struct *task, unsigned long offset)
{
	switch (offset) {
	case offsetof(struct user_regs_struct, cs):
	case offsetof(struct user_regs_struct, ds):
	case offsetof(struct user_regs_struct, es):
	case offsetof(struct user_regs_struct, fs):
	case offsetof(struct user_regs_struct, gs):
	case offsetof(struct user_regs_struct, ss):
		return get_segment_reg(task, offset);

	case offsetof(struct user_regs_struct, flags):
		return get_flags(task);
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499

#ifdef CONFIG_X86_64
	case offsetof(struct user_regs_struct, fs_base): {
		/*
		 * do_arch_prctl may have used a GDT slot instead of
		 * the MSR.  To userland, it appears the same either
		 * way, except the %fs segment selector might not be 0.
		 */
		unsigned int seg = task->thread.fsindex;
		if (task->thread.fs != 0)
			return task->thread.fs;
		if (task == current)
			asm("movl %%fs,%0" : "=r" (seg));
		if (seg != FS_TLS_SEL)
			return 0;
		return get_desc_base(&task->thread.tls_array[FS_TLS]);
	}
	case offsetof(struct user_regs_struct, gs_base): {
		/*
		 * Exactly the same here as the %fs handling above.
		 */
		unsigned int seg = task->thread.gsindex;
		if (task->thread.gs != 0)
			return task->thread.gs;
		if (task == current)
			asm("movl %%gs,%0" : "=r" (seg));
		if (seg != GS_TLS_SEL)
			return 0;
		return get_desc_base(&task->thread.tls_array[GS_TLS]);
	}
#endif
500 501 502 503 504
	}

	return *pt_regs_access(task_pt_regs(task), offset);
}

505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
static int genregs_get(struct task_struct *target,
		       const struct user_regset *regset,
		       unsigned int pos, unsigned int count,
		       void *kbuf, void __user *ubuf)
{
	if (kbuf) {
		unsigned long *k = kbuf;
		while (count > 0) {
			*k++ = getreg(target, pos);
			count -= sizeof(*k);
			pos += sizeof(*k);
		}
	} else {
		unsigned long __user *u = ubuf;
		while (count > 0) {
			if (__put_user(getreg(target, pos), u++))
				return -EFAULT;
			count -= sizeof(*u);
			pos += sizeof(*u);
		}
	}

	return 0;
}

static int genregs_set(struct task_struct *target,
		       const struct user_regset *regset,
		       unsigned int pos, unsigned int count,
		       const void *kbuf, const void __user *ubuf)
{
	int ret = 0;
	if (kbuf) {
		const unsigned long *k = kbuf;
		while (count > 0 && !ret) {
			ret = putreg(target, pos, *k++);
			count -= sizeof(*k);
			pos += sizeof(*k);
		}
	} else {
		const unsigned long  __user *u = ubuf;
		while (count > 0 && !ret) {
			unsigned long word;
			ret = __get_user(word, u++);
			if (ret)
				break;
			ret = putreg(target, pos, word);
			count -= sizeof(*u);
			pos += sizeof(*u);
		}
	}
	return ret;
}

558 559 560
static void ptrace_triggered(struct perf_event *bp, int nmi,
			     struct perf_sample_data *data,
			     struct pt_regs *regs)
561
{
562
	int i;
563
	struct thread_struct *thread = &(current->thread);
564

565 566 567 568
	/*
	 * Store in the virtual DR6 register the fact that the breakpoint
	 * was hit so the thread's debugger will see it.
	 */
569 570
	for (i = 0; i < HBP_NUM; i++) {
		if (thread->ptrace_bps[i] == bp)
571
			break;
572
	}
573

574 575
	thread->debugreg6 |= (DR_TRAP0 << i);
}
576 577

/*
578 579 580
 * Walk through every ptrace breakpoints for this thread and
 * build the dr7 value on top of their attributes.
 *
581
 */
582
static unsigned long ptrace_get_dr7(struct perf_event *bp[])
583
{
584 585 586 587 588 589 590 591 592
	int i;
	int dr7 = 0;
	struct arch_hw_breakpoint *info;

	for (i = 0; i < HBP_NUM; i++) {
		if (bp[i] && !bp[i]->attr.disabled) {
			info = counter_arch_bp(bp[i]);
			dr7 |= encode_dr7(i, info->len, info->type);
		}
593
	}
594 595

	return dr7;
596 597
}

598 599
static struct perf_event *
ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
600
			 struct task_struct *tsk, int disabled)
601 602 603
{
	int err;
	int gen_len, gen_type;
604
	struct perf_event_attr attr;
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620

	/*
	 * We shoud have at least an inactive breakpoint at this
	 * slot. It means the user is writing dr7 without having
	 * written the address register first
	 */
	if (!bp)
		return ERR_PTR(-EINVAL);

	err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
	if (err)
		return ERR_PTR(err);

	attr = bp->attr;
	attr.bp_len = gen_len;
	attr.bp_type = gen_type;
621
	attr.disabled = disabled;
622

623
	return modify_user_hw_breakpoint(bp, &attr);
624 625
}

626 627 628 629
/*
 * Handle ptrace writes to debug register 7.
 */
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
630
{
631
	struct thread_struct *thread = &(tsk->thread);
632
	unsigned long old_dr7;
633 634 635
	int i, orig_ret = 0, rc = 0;
	int enabled, second_pass = 0;
	unsigned len, type;
636
	struct perf_event *bp;
637 638

	data &= ~DR_CONTROL_RESERVED;
639
	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
640 641 642 643 644 645 646
restore:
	/*
	 * Loop through all the hardware breakpoints, making the
	 * appropriate changes to each.
	 */
	for (i = 0; i < HBP_NUM; i++) {
		enabled = decode_dr7(data, i, &len, &type);
647
		bp = thread->ptrace_bps[i];
648 649 650

		if (!enabled) {
			if (bp) {
651 652
				/*
				 * Don't unregister the breakpoints right-away,
653 654 655 656 657 658 659
				 * unless all register_user_hw_breakpoint()
				 * requests have succeeded. This prevents
				 * any window of opportunity for debug
				 * register grabbing by other users.
				 */
				if (!second_pass)
					continue;
660

661
				thread->ptrace_bps[i] = NULL;
662 663 664 665 666 667 668 669
				bp = ptrace_modify_breakpoint(bp, len, type,
							      tsk, 1);
				if (IS_ERR(bp)) {
					rc = PTR_ERR(bp);
					thread->ptrace_bps[i] = NULL;
					break;
				}
				thread->ptrace_bps[i] = bp;
670 671 672
			}
			continue;
		}
673

674
		bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
675

676
		/* Incorrect bp, or we have a bug in bp API */
677 678
		if (IS_ERR(bp)) {
			rc = PTR_ERR(bp);
679
			thread->ptrace_bps[i] = NULL;
680 681 682
			break;
		}
		thread->ptrace_bps[i] = bp;
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697
	}
	/*
	 * Make a second pass to free the remaining unused breakpoints
	 * or to restore the original breakpoints if an error occurred.
	 */
	if (!second_pass) {
		second_pass = 1;
		if (rc < 0) {
			orig_ret = rc;
			data = old_dr7;
		}
		goto restore;
	}
	return ((orig_ret < 0) ? orig_ret : rc);
}
698

699 700 701
/*
 * Handle PTRACE_PEEKUSR calls for the debug register area.
 */
702
static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
703 704 705 706
{
	struct thread_struct *thread = &(tsk->thread);
	unsigned long val = 0;

707 708 709 710 711 712 713
	if (n < HBP_NUM) {
		struct perf_event *bp;
		bp = thread->ptrace_bps[n];
		if (!bp)
			return 0;
		val = bp->hw.info.address;
	} else if (n == 6) {
714
		val = thread->debugreg6;
715 716 717
	 } else if (n == 7) {
		val = ptrace_get_dr7(thread->ptrace_bps);
	}
718 719
	return val;
}
720

721 722 723 724 725
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
				      unsigned long addr)
{
	struct perf_event *bp;
	struct thread_struct *t = &tsk->thread;
726
	struct perf_event_attr attr;
727 728

	if (!t->ptrace_bps[nr]) {
729
		hw_breakpoint_init(&attr);
730
		/*
731 732
		 * Put stub len and type to register (reserve) an inactive but
		 * correct bp
733
		 */
734 735 736 737 738 739
		attr.bp_addr = addr;
		attr.bp_len = HW_BREAKPOINT_LEN_1;
		attr.bp_type = HW_BREAKPOINT_W;
		attr.disabled = 1;

		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
740 741 742
	} else {
		bp = t->ptrace_bps[nr];
		t->ptrace_bps[nr] = NULL;
743 744 745

		attr = bp->attr;
		attr.bp_addr = addr;
746
		bp = modify_user_hw_breakpoint(bp, &attr);
747
	}
748 749 750 751 752 753 754 755 756 757 758 759 760
	/*
	 * CHECKME: the previous code returned -EIO if the addr wasn't a
	 * valid task virtual addr. The new one will return -EINVAL in this
	 * case.
	 * -EINVAL may be what we want for in-kernel breakpoints users, but
	 * -EIO looks better for ptrace, since we refuse a register writing
	 * for the user. And anyway this is the previous behaviour.
	 */
	if (IS_ERR(bp))
		return PTR_ERR(bp);

	t->ptrace_bps[nr] = bp;

761 762 763
	return 0;
}

764 765 766 767 768 769 770 771 772 773 774 775 776
/*
 * Handle PTRACE_POKEUSR calls for the debug register area.
 */
int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
{
	struct thread_struct *thread = &(tsk->thread);
	int rc = 0;

	/* There are no DR4 or DR5 registers */
	if (n == 4 || n == 5)
		return -EIO;

	if (n == 6) {
777
		thread->debugreg6 = val;
778
		goto ret_path;
779
	}
780
	if (n < HBP_NUM) {
781 782 783
		rc = ptrace_set_breakpoint_addr(tsk, n, val);
		if (rc)
			return rc;
784 785 786 787
	}
	/* All that's left is DR7 */
	if (n == 7)
		rc = ptrace_write_dr7(tsk, val);
788

789 790
ret_path:
	return rc;
791 792
}

R
Roland McGrath 已提交
793 794 795 796 797 798 799 800 801
/*
 * These access the current or another (stopped) task's io permission
 * bitmap for debugging or core dump.
 */
static int ioperm_active(struct task_struct *target,
			 const struct user_regset *regset)
{
	return target->thread.io_bitmap_max / regset->size;
}
802

R
Roland McGrath 已提交
803 804 805 806
static int ioperm_get(struct task_struct *target,
		      const struct user_regset *regset,
		      unsigned int pos, unsigned int count,
		      void *kbuf, void __user *ubuf)
807
{
R
Roland McGrath 已提交
808
	if (!target->thread.io_bitmap_ptr)
809 810
		return -ENXIO;

R
Roland McGrath 已提交
811 812 813 814 815
	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
				   target->thread.io_bitmap_ptr,
				   0, IO_BITMAP_BYTES);
}

M
Markus Metzger 已提交
816
#ifdef CONFIG_X86_PTRACE_BTS
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
/*
 * A branch trace store context.
 *
 * Contexts may only be installed by ptrace_bts_config() and only for
 * ptraced tasks.
 *
 * Contexts are destroyed when the tracee is detached from the tracer.
 * The actual destruction work requires interrupts enabled, so the
 * work is deferred and will be scheduled during __ptrace_unlink().
 *
 * Contexts hold an additional task_struct reference on the traced
 * task, as well as a reference on the tracer's mm.
 *
 * Ptrace already holds a task_struct for the duration of ptrace operations,
 * but since destruction is deferred, it may be executed after both
 * tracer and tracee exited.
 */
struct bts_context {
	/* The branch trace handle. */
	struct bts_tracer	*tracer;

	/* The buffer used to store the branch trace and its size. */
	void			*buffer;
	unsigned int		size;

	/* The mm that paid for the above buffer. */
	struct mm_struct	*mm;

	/* The task this context belongs to. */
	struct task_struct	*task;

	/* The signal to send on a bts buffer overflow. */
	unsigned int		bts_ovfl_signal;

	/* The work struct to destroy a context. */
	struct work_struct	work;
};

855
static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
856
{
857 858
	void *buffer = NULL;
	int err = -ENOMEM;
859

860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
	err = account_locked_memory(current->mm, current->signal->rlim, size);
	if (err < 0)
		return err;

	buffer = kzalloc(size, GFP_KERNEL);
	if (!buffer)
		goto out_refund;

	context->buffer = buffer;
	context->size = size;
	context->mm = get_task_mm(current);

	return 0;

 out_refund:
	refund_locked_memory(current->mm, size);
	return err;
877 878 879 880 881 882 883 884 885 886
}

static inline void free_bts_buffer(struct bts_context *context)
{
	if (!context->buffer)
		return;

	kfree(context->buffer);
	context->buffer = NULL;

887
	refund_locked_memory(context->mm, context->size);
888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
	context->size = 0;

	mmput(context->mm);
	context->mm = NULL;
}

static void free_bts_context_work(struct work_struct *w)
{
	struct bts_context *context;

	context = container_of(w, struct bts_context, work);

	ds_release_bts(context->tracer);
	put_task_struct(context->task);
	free_bts_buffer(context);
	kfree(context);
}

static inline void free_bts_context(struct bts_context *context)
{
	INIT_WORK(&context->work, free_bts_context_work);
	schedule_work(&context->work);
}

static inline struct bts_context *alloc_bts_context(struct task_struct *task)
{
	struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
	if (context) {
		context->task = task;
		task->bts = context;

		get_task_struct(task);
	}

	return context;
}

M
Markus Metzger 已提交
925
static int ptrace_bts_read_record(struct task_struct *child, size_t index,
926 927
				  struct bts_struct __user *out)
{
928
	struct bts_context *context;
929 930 931
	const struct bts_trace *trace;
	struct bts_struct bts;
	const unsigned char *at;
M
Markus Metzger 已提交
932
	int error;
933

934 935 936 937 938
	context = child->bts;
	if (!context)
		return -ESRCH;

	trace = ds_read_bts(context->tracer);
939
	if (!trace)
940
		return -ESRCH;
941

942 943 944
	at = trace->ds.top - ((index + 1) * trace->ds.size);
	if ((void *)at < trace->ds.begin)
		at += (trace->ds.n * trace->ds.size);
M
Markus Metzger 已提交
945

946 947
	if (!trace->read)
		return -EOPNOTSUPP;
M
Markus Metzger 已提交
948

949
	error = trace->read(context->tracer, at, &bts);
M
Markus Metzger 已提交
950 951
	if (error < 0)
		return error;
952

953
	if (copy_to_user(out, &bts, sizeof(bts)))
954 955
		return -EFAULT;

956
	return sizeof(bts);
957 958
}

M
Markus Metzger 已提交
959
static int ptrace_bts_drain(struct task_struct *child,
960
			    long size,
M
Markus Metzger 已提交
961 962
			    struct bts_struct __user *out)
{
963
	struct bts_context *context;
964 965 966
	const struct bts_trace *trace;
	const unsigned char *at;
	int error, drained = 0;
967

968 969 970 971 972
	context = child->bts;
	if (!context)
		return -ESRCH;

	trace = ds_read_bts(context->tracer);
973
	if (!trace)
974
		return -ESRCH;
M
Markus Metzger 已提交
975

976 977 978 979
	if (!trace->read)
		return -EOPNOTSUPP;

	if (size < (trace->ds.top - trace->ds.begin))
980 981
		return -EIO;

982 983 984
	for (at = trace->ds.begin; (void *)at < trace->ds.top;
	     out++, drained++, at += trace->ds.size) {
		struct bts_struct bts;
M
Markus Metzger 已提交
985

986
		error = trace->read(context->tracer, at, &bts);
987 988
		if (error < 0)
			return error;
M
Markus Metzger 已提交
989

990
		if (copy_to_user(out, &bts, sizeof(bts)))
M
Markus Metzger 已提交
991 992 993
			return -EFAULT;
	}

994 995
	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);

996
	error = ds_reset_bts(context->tracer);
M
Markus Metzger 已提交
997 998
	if (error < 0)
		return error;
M
Markus Metzger 已提交
999

1000
	return drained;
M
Markus Metzger 已提交
1001 1002 1003
}

static int ptrace_bts_config(struct task_struct *child,
1004
			     long cfg_size,
M
Markus Metzger 已提交
1005 1006
			     const struct ptrace_bts_config __user *ucfg)
{
1007
	struct bts_context *context;
M
Markus Metzger 已提交
1008
	struct ptrace_bts_config cfg;
1009
	unsigned int flags = 0;
M
Markus Metzger 已提交
1010

1011
	if (cfg_size < sizeof(cfg))
1012
		return -EIO;
1013

M
Markus Metzger 已提交
1014
	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
1015
		return -EFAULT;
1016

1017 1018 1019 1020 1021
	context = child->bts;
	if (!context)
		context = alloc_bts_context(child);
	if (!context)
		return -ENOMEM;
M
Markus Metzger 已提交
1022

1023 1024 1025
	if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
		if (!cfg.signal)
			return -EINVAL;
1026

1027
		return -EOPNOTSUPP;
1028
		context->bts_ovfl_signal = cfg.signal;
1029
	}
1030

1031 1032
	ds_release_bts(context->tracer);
	context->tracer = NULL;
1033

1034
	if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
1035 1036
		int err;

1037 1038 1039
		free_bts_buffer(context);
		if (!cfg.size)
			return 0;
M
Markus Metzger 已提交
1040

1041 1042 1043
		err = alloc_bts_buffer(context, cfg.size);
		if (err < 0)
			return err;
M
Markus Metzger 已提交
1044 1045
	}

1046
	if (cfg.flags & PTRACE_BTS_O_TRACE)
1047
		flags |= BTS_USER;
1048

1049
	if (cfg.flags & PTRACE_BTS_O_SCHED)
1050
		flags |= BTS_TIMESTAMPS;
1051

1052 1053 1054
	context->tracer =
		ds_request_bts_task(child, context->buffer, context->size,
				    NULL, (size_t)-1, flags);
1055 1056
	if (unlikely(IS_ERR(context->tracer))) {
		int error = PTR_ERR(context->tracer);
1057

1058 1059
		free_bts_buffer(context);
		context->tracer = NULL;
1060 1061
		return error;
	}
1062

1063
	return sizeof(cfg);
1064 1065
}

M
Markus Metzger 已提交
1066
static int ptrace_bts_status(struct task_struct *child,
1067
			     long cfg_size,
M
Markus Metzger 已提交
1068
			     struct ptrace_bts_config __user *ucfg)
1069
{
1070
	struct bts_context *context;
1071
	const struct bts_trace *trace;
M
Markus Metzger 已提交
1072
	struct ptrace_bts_config cfg;
1073

1074 1075 1076 1077
	context = child->bts;
	if (!context)
		return -ESRCH;

1078 1079 1080
	if (cfg_size < sizeof(cfg))
		return -EIO;

1081
	trace = ds_read_bts(context->tracer);
1082
	if (!trace)
1083
		return -ESRCH;
1084

M
Markus Metzger 已提交
1085
	memset(&cfg, 0, sizeof(cfg));
1086 1087 1088
	cfg.size	= trace->ds.end - trace->ds.begin;
	cfg.signal	= context->bts_ovfl_signal;
	cfg.bts_size	= sizeof(struct bts_struct);
1089

M
Markus Metzger 已提交
1090 1091
	if (cfg.signal)
		cfg.flags |= PTRACE_BTS_O_SIGNAL;
1092

1093
	if (trace->ds.flags & BTS_USER)
M
Markus Metzger 已提交
1094 1095
		cfg.flags |= PTRACE_BTS_O_TRACE;

1096
	if (trace->ds.flags & BTS_TIMESTAMPS)
M
Markus Metzger 已提交
1097
		cfg.flags |= PTRACE_BTS_O_SCHED;
1098

M
Markus Metzger 已提交
1099 1100
	if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
		return -EFAULT;
1101

M
Markus Metzger 已提交
1102
	return sizeof(cfg);
1103 1104
}

1105
static int ptrace_bts_clear(struct task_struct *child)
1106
{
1107
	struct bts_context *context;
1108
	const struct bts_trace *trace;
1109

1110 1111 1112 1113 1114
	context = child->bts;
	if (!context)
		return -ESRCH;

	trace = ds_read_bts(context->tracer);
1115
	if (!trace)
1116
		return -ESRCH;
1117

1118
	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
1119

1120
	return ds_reset_bts(context->tracer);
1121 1122
}

1123
static int ptrace_bts_size(struct task_struct *child)
1124
{
1125
	struct bts_context *context;
1126
	const struct bts_trace *trace;
M
Markus Metzger 已提交
1127

1128 1129 1130 1131 1132
	context = child->bts;
	if (!context)
		return -ESRCH;

	trace = ds_read_bts(context->tracer);
1133
	if (!trace)
1134
		return -ESRCH;
M
Markus Metzger 已提交
1135

1136
	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
M
Markus Metzger 已提交
1137
}
1138

1139 1140 1141 1142
/*
 * Called from __ptrace_unlink() after the child has been moved back
 * to its original parent.
 */
1143
void ptrace_bts_untrace(struct task_struct *child)
1144 1145
{
	if (unlikely(child->bts)) {
1146
		free_bts_context(child->bts);
1147 1148 1149
		child->bts = NULL;
	}
}
M
Markus Metzger 已提交
1150
#endif /* CONFIG_X86_PTRACE_BTS */
1151

L
Linus Torvalds 已提交
1152 1153 1154 1155 1156 1157
/*
 * Called by kernel/ptrace.c when detaching..
 *
 * Make sure the single step bit is not set.
 */
void ptrace_disable(struct task_struct *child)
R
Roland McGrath 已提交
1158
{
R
Roland McGrath 已提交
1159
	user_disable_single_step(child);
R
Roland McGrath 已提交
1160
#ifdef TIF_SYSCALL_EMU
1161
	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
R
Roland McGrath 已提交
1162
#endif
L
Linus Torvalds 已提交
1163 1164
}

R
Roland McGrath 已提交
1165 1166 1167 1168
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
static const struct user_regset_view user_x86_32_view; /* Initialized below. */
#endif

1169
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
L
Linus Torvalds 已提交
1170
{
R
Roland McGrath 已提交
1171
	int ret;
L
Linus Torvalds 已提交
1172 1173 1174 1175 1176 1177 1178 1179
	unsigned long __user *datap = (unsigned long __user *)data;

	switch (request) {
	/* read the word at location addr in the USER area. */
	case PTRACE_PEEKUSR: {
		unsigned long tmp;

		ret = -EIO;
R
Roland McGrath 已提交
1180 1181
		if ((addr & (sizeof(data) - 1)) || addr < 0 ||
		    addr >= sizeof(struct user))
L
Linus Torvalds 已提交
1182 1183 1184
			break;

		tmp = 0;  /* Default return condition */
R
Roland McGrath 已提交
1185
		if (addr < sizeof(struct user_regs_struct))
L
Linus Torvalds 已提交
1186
			tmp = getreg(child, addr);
R
Roland McGrath 已提交
1187 1188 1189 1190
		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
			 addr <= offsetof(struct user, u_debugreg[7])) {
			addr -= offsetof(struct user, u_debugreg[0]);
			tmp = ptrace_get_debugreg(child, addr / sizeof(data));
L
Linus Torvalds 已提交
1191 1192 1193 1194 1195 1196 1197
		}
		ret = put_user(tmp, datap);
		break;
	}

	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
		ret = -EIO;
R
Roland McGrath 已提交
1198 1199
		if ((addr & (sizeof(data) - 1)) || addr < 0 ||
		    addr >= sizeof(struct user))
L
Linus Torvalds 已提交
1200 1201
			break;

R
Roland McGrath 已提交
1202
		if (addr < sizeof(struct user_regs_struct))
L
Linus Torvalds 已提交
1203
			ret = putreg(child, addr, data);
R
Roland McGrath 已提交
1204 1205 1206 1207 1208
		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
			 addr <= offsetof(struct user, u_debugreg[7])) {
			addr -= offsetof(struct user, u_debugreg[0]);
			ret = ptrace_set_debugreg(child,
						  addr / sizeof(data), data);
L
Linus Torvalds 已提交
1209
		}
R
Roland McGrath 已提交
1210
		break;
L
Linus Torvalds 已提交
1211

R
Roland McGrath 已提交
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
		return copy_regset_to_user(child,
					   task_user_regset_view(current),
					   REGSET_GENERAL,
					   0, sizeof(struct user_regs_struct),
					   datap);

	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
		return copy_regset_from_user(child,
					     task_user_regset_view(current),
					     REGSET_GENERAL,
					     0, sizeof(struct user_regs_struct),
					     datap);

	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
		return copy_regset_to_user(child,
					   task_user_regset_view(current),
					   REGSET_FP,
					   0, sizeof(struct user_i387_struct),
					   datap);

	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
		return copy_regset_from_user(child,
					     task_user_regset_view(current),
					     REGSET_FP,
					     0, sizeof(struct user_i387_struct),
					     datap);
L
Linus Torvalds 已提交
1239

R
Roland McGrath 已提交
1240
#ifdef CONFIG_X86_32
R
Roland McGrath 已提交
1241 1242 1243 1244
	case PTRACE_GETFPXREGS:	/* Get the child extended FPU state. */
		return copy_regset_to_user(child, &user_x86_32_view,
					   REGSET_XFP,
					   0, sizeof(struct user_fxsr_struct),
1245
					   datap) ? -EIO : 0;
R
Roland McGrath 已提交
1246 1247 1248 1249 1250

	case PTRACE_SETFPXREGS:	/* Set the child extended FPU state. */
		return copy_regset_from_user(child, &user_x86_32_view,
					     REGSET_XFP,
					     0, sizeof(struct user_fxsr_struct),
1251
					     datap) ? -EIO : 0;
R
Roland McGrath 已提交
1252
#endif
L
Linus Torvalds 已提交
1253

R
Roland McGrath 已提交
1254
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
L
Linus Torvalds 已提交
1255
	case PTRACE_GET_THREAD_AREA:
R
Roland McGrath 已提交
1256 1257 1258 1259
		if (addr < 0)
			return -EIO;
		ret = do_get_thread_area(child, addr,
					 (struct user_desc __user *) data);
L
Linus Torvalds 已提交
1260 1261 1262
		break;

	case PTRACE_SET_THREAD_AREA:
R
Roland McGrath 已提交
1263 1264 1265 1266
		if (addr < 0)
			return -EIO;
		ret = do_set_thread_area(child, addr,
					 (struct user_desc __user *) data, 0);
L
Linus Torvalds 已提交
1267
		break;
R
Roland McGrath 已提交
1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
#endif

#ifdef CONFIG_X86_64
		/* normal 64bit interface to access TLS data.
		   Works just like arch_prctl, except that the arguments
		   are reversed. */
	case PTRACE_ARCH_PRCTL:
		ret = do_arch_prctl(child, data, addr);
		break;
#endif
L
Linus Torvalds 已提交
1278

1279 1280 1281
	/*
	 * These bits need more cooking - not enabled yet:
	 */
M
Markus Metzger 已提交
1282
#ifdef CONFIG_X86_PTRACE_BTS
M
Markus Metzger 已提交
1283 1284
	case PTRACE_BTS_CONFIG:
		ret = ptrace_bts_config
1285
			(child, data, (struct ptrace_bts_config __user *)addr);
1286 1287
		break;

M
Markus Metzger 已提交
1288 1289
	case PTRACE_BTS_STATUS:
		ret = ptrace_bts_status
1290
			(child, data, (struct ptrace_bts_config __user *)addr);
1291 1292
		break;

1293 1294
	case PTRACE_BTS_SIZE:
		ret = ptrace_bts_size(child);
1295 1296
		break;

M
Markus Metzger 已提交
1297
	case PTRACE_BTS_GET:
1298
		ret = ptrace_bts_read_record
M
Markus Metzger 已提交
1299
			(child, data, (struct bts_struct __user *) addr);
1300 1301
		break;

M
Markus Metzger 已提交
1302
	case PTRACE_BTS_CLEAR:
1303
		ret = ptrace_bts_clear(child);
1304 1305
		break;

M
Markus Metzger 已提交
1306 1307
	case PTRACE_BTS_DRAIN:
		ret = ptrace_bts_drain
1308
			(child, data, (struct bts_struct __user *) addr);
1309
		break;
M
Markus Metzger 已提交
1310
#endif /* CONFIG_X86_PTRACE_BTS */
1311

L
Linus Torvalds 已提交
1312 1313 1314 1315
	default:
		ret = ptrace_request(child, request, addr, data);
		break;
	}
1316

L
Linus Torvalds 已提交
1317 1318 1319
	return ret;
}

1320 1321
#ifdef CONFIG_IA32_EMULATION

R
Roland McGrath 已提交
1322 1323 1324
#include <linux/compat.h>
#include <linux/syscalls.h>
#include <asm/ia32.h>
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360
#include <asm/user32.h>

#define R32(l,q)							\
	case offsetof(struct user32, regs.l):				\
		regs->q = value; break

#define SEG32(rs)							\
	case offsetof(struct user32, regs.rs):				\
		return set_segment_reg(child,				\
				       offsetof(struct user_regs_struct, rs), \
				       value);				\
		break

static int putreg32(struct task_struct *child, unsigned regno, u32 value)
{
	struct pt_regs *regs = task_pt_regs(child);

	switch (regno) {

	SEG32(cs);
	SEG32(ds);
	SEG32(es);
	SEG32(fs);
	SEG32(gs);
	SEG32(ss);

	R32(ebx, bx);
	R32(ecx, cx);
	R32(edx, dx);
	R32(edi, di);
	R32(esi, si);
	R32(ebp, bp);
	R32(eax, ax);
	R32(eip, ip);
	R32(esp, sp);

R
Roland McGrath 已提交
1361 1362
	case offsetof(struct user32, regs.orig_eax):
		/*
1363 1364 1365 1366 1367
		 * A 32-bit debugger setting orig_eax means to restore
		 * the state of the task restarting a 32-bit syscall.
		 * Make sure we interpret the -ERESTART* codes correctly
		 * in case the task is not actually still sitting at the
		 * exit from a 32-bit syscall with TS_COMPAT still set.
R
Roland McGrath 已提交
1368
		 */
1369 1370 1371
		regs->orig_ax = value;
		if (syscall_get_nr(child, regs) >= 0)
			task_thread_info(child)->status |= TS_COMPAT;
R
Roland McGrath 已提交
1372 1373
		break;

1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
	case offsetof(struct user32, regs.eflags):
		return set_flags(child, value);

	case offsetof(struct user32, u_debugreg[0]) ...
		offsetof(struct user32, u_debugreg[7]):
		regno -= offsetof(struct user32, u_debugreg[0]);
		return ptrace_set_debugreg(child, regno / 4, value);

	default:
		if (regno > sizeof(struct user32) || (regno & 3))
			return -EIO;

		/*
		 * Other dummy fields in the virtual user structure
		 * are ignored
		 */
		break;
	}
	return 0;
}

#undef R32
#undef SEG32

#define R32(l,q)							\
	case offsetof(struct user32, regs.l):				\
		*val = regs->q; break

#define SEG32(rs)							\
	case offsetof(struct user32, regs.rs):				\
		*val = get_segment_reg(child,				\
				       offsetof(struct user_regs_struct, rs)); \
		break

static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
{
	struct pt_regs *regs = task_pt_regs(child);

	switch (regno) {

	SEG32(ds);
	SEG32(es);
	SEG32(fs);
	SEG32(gs);

	R32(cs, cs);
	R32(ss, ss);
	R32(ebx, bx);
	R32(ecx, cx);
	R32(edx, dx);
	R32(edi, di);
	R32(esi, si);
	R32(ebp, bp);
	R32(eax, ax);
	R32(orig_eax, orig_ax);
	R32(eip, ip);
	R32(esp, sp);

	case offsetof(struct user32, regs.eflags):
		*val = get_flags(child);
		break;

	case offsetof(struct user32, u_debugreg[0]) ...
		offsetof(struct user32, u_debugreg[7]):
		regno -= offsetof(struct user32, u_debugreg[0]);
		*val = ptrace_get_debugreg(child, regno / 4);
		break;

	default:
		if (regno > sizeof(struct user32) || (regno & 3))
			return -EIO;

		/*
		 * Other dummy fields in the virtual user structure
		 * are ignored
		 */
		*val = 0;
		break;
	}
	return 0;
}

#undef R32
#undef SEG32

1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
static int genregs32_get(struct task_struct *target,
			 const struct user_regset *regset,
			 unsigned int pos, unsigned int count,
			 void *kbuf, void __user *ubuf)
{
	if (kbuf) {
		compat_ulong_t *k = kbuf;
		while (count > 0) {
			getreg32(target, pos, k++);
			count -= sizeof(*k);
			pos += sizeof(*k);
		}
	} else {
		compat_ulong_t __user *u = ubuf;
		while (count > 0) {
			compat_ulong_t word;
			getreg32(target, pos, &word);
			if (__put_user(word, u++))
				return -EFAULT;
			count -= sizeof(*u);
			pos += sizeof(*u);
		}
	}

	return 0;
}

static int genregs32_set(struct task_struct *target,
			 const struct user_regset *regset,
			 unsigned int pos, unsigned int count,
			 const void *kbuf, const void __user *ubuf)
{
	int ret = 0;
	if (kbuf) {
		const compat_ulong_t *k = kbuf;
		while (count > 0 && !ret) {
1495
			ret = putreg32(target, pos, *k++);
1496 1497 1498 1499 1500 1501 1502 1503 1504 1505
			count -= sizeof(*k);
			pos += sizeof(*k);
		}
	} else {
		const compat_ulong_t __user *u = ubuf;
		while (count > 0 && !ret) {
			compat_ulong_t word;
			ret = __get_user(word, u++);
			if (ret)
				break;
1506
			ret = putreg32(target, pos, word);
1507 1508 1509 1510 1511 1512 1513
			count -= sizeof(*u);
			pos += sizeof(*u);
		}
	}
	return ret;
}

1514 1515
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
			compat_ulong_t caddr, compat_ulong_t cdata)
R
Roland McGrath 已提交
1516
{
1517 1518
	unsigned long addr = caddr;
	unsigned long data = cdata;
R
Roland McGrath 已提交
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533
	void __user *datap = compat_ptr(data);
	int ret;
	__u32 val;

	switch (request) {
	case PTRACE_PEEKUSR:
		ret = getreg32(child, addr, &val);
		if (ret == 0)
			ret = put_user(val, (__u32 __user *)datap);
		break;

	case PTRACE_POKEUSR:
		ret = putreg32(child, addr, data);
		break;

R
Roland McGrath 已提交
1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567
	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
		return copy_regset_to_user(child, &user_x86_32_view,
					   REGSET_GENERAL,
					   0, sizeof(struct user_regs_struct32),
					   datap);

	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
		return copy_regset_from_user(child, &user_x86_32_view,
					     REGSET_GENERAL, 0,
					     sizeof(struct user_regs_struct32),
					     datap);

	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
		return copy_regset_to_user(child, &user_x86_32_view,
					   REGSET_FP, 0,
					   sizeof(struct user_i387_ia32_struct),
					   datap);

	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
		return copy_regset_from_user(
			child, &user_x86_32_view, REGSET_FP,
			0, sizeof(struct user_i387_ia32_struct), datap);

	case PTRACE_GETFPXREGS:	/* Get the child extended FPU state. */
		return copy_regset_to_user(child, &user_x86_32_view,
					   REGSET_XFP, 0,
					   sizeof(struct user32_fxsr_struct),
					   datap);

	case PTRACE_SETFPXREGS:	/* Set the child extended FPU state. */
		return copy_regset_from_user(child, &user_x86_32_view,
					     REGSET_XFP, 0,
					     sizeof(struct user32_fxsr_struct),
					     datap);
R
Roland McGrath 已提交
1568

1569 1570
	case PTRACE_GET_THREAD_AREA:
	case PTRACE_SET_THREAD_AREA:
1571 1572 1573 1574 1575 1576 1577 1578
#ifdef CONFIG_X86_PTRACE_BTS
	case PTRACE_BTS_CONFIG:
	case PTRACE_BTS_STATUS:
	case PTRACE_BTS_SIZE:
	case PTRACE_BTS_GET:
	case PTRACE_BTS_CLEAR:
	case PTRACE_BTS_DRAIN:
#endif /* CONFIG_X86_PTRACE_BTS */
1579 1580
		return arch_ptrace(child, request, addr, data);

R
Roland McGrath 已提交
1581
	default:
1582
		return compat_ptrace_request(child, request, addr, data);
R
Roland McGrath 已提交
1583 1584 1585 1586 1587
	}

	return ret;
}

1588 1589
#endif	/* CONFIG_IA32_EMULATION */

R
Roland McGrath 已提交
1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604
#ifdef CONFIG_X86_64

static const struct user_regset x86_64_regsets[] = {
	[REGSET_GENERAL] = {
		.core_note_type = NT_PRSTATUS,
		.n = sizeof(struct user_regs_struct) / sizeof(long),
		.size = sizeof(long), .align = sizeof(long),
		.get = genregs_get, .set = genregs_set
	},
	[REGSET_FP] = {
		.core_note_type = NT_PRFPREG,
		.n = sizeof(struct user_i387_struct) / sizeof(long),
		.size = sizeof(long), .align = sizeof(long),
		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
	},
R
Roland McGrath 已提交
1605 1606 1607 1608 1609 1610
	[REGSET_IOPERM64] = {
		.core_note_type = NT_386_IOPERM,
		.n = IO_BITMAP_LONGS,
		.size = sizeof(long), .align = sizeof(long),
		.active = ioperm_active, .get = ioperm_get
	},
R
Roland McGrath 已提交
1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623
};

static const struct user_regset_view user_x86_64_view = {
	.name = "x86_64", .e_machine = EM_X86_64,
	.regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets)
};

#else  /* CONFIG_X86_32 */

#define user_regs_struct32	user_regs_struct
#define genregs32_get		genregs_get
#define genregs32_set		genregs_set

1624 1625 1626
#define user_i387_ia32_struct	user_i387_struct
#define user32_fxsr_struct	user_fxsr_struct

R
Roland McGrath 已提交
1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638
#endif	/* CONFIG_X86_64 */

#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
static const struct user_regset x86_32_regsets[] = {
	[REGSET_GENERAL] = {
		.core_note_type = NT_PRSTATUS,
		.n = sizeof(struct user_regs_struct32) / sizeof(u32),
		.size = sizeof(u32), .align = sizeof(u32),
		.get = genregs32_get, .set = genregs32_set
	},
	[REGSET_FP] = {
		.core_note_type = NT_PRFPREG,
1639
		.n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
R
Roland McGrath 已提交
1640 1641 1642 1643 1644
		.size = sizeof(u32), .align = sizeof(u32),
		.active = fpregs_active, .get = fpregs_get, .set = fpregs_set
	},
	[REGSET_XFP] = {
		.core_note_type = NT_PRXFPREG,
1645
		.n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
R
Roland McGrath 已提交
1646 1647 1648 1649
		.size = sizeof(u32), .align = sizeof(u32),
		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
	},
	[REGSET_TLS] = {
R
Roland McGrath 已提交
1650
		.core_note_type = NT_386_TLS,
R
Roland McGrath 已提交
1651 1652 1653 1654 1655 1656
		.n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
		.size = sizeof(struct user_desc),
		.align = sizeof(struct user_desc),
		.active = regset_tls_active,
		.get = regset_tls_get, .set = regset_tls_set
	},
R
Roland McGrath 已提交
1657 1658 1659 1660 1661 1662
	[REGSET_IOPERM32] = {
		.core_note_type = NT_386_IOPERM,
		.n = IO_BITMAP_BYTES / sizeof(u32),
		.size = sizeof(u32), .align = sizeof(u32),
		.active = ioperm_active, .get = ioperm_get
	},
R
Roland McGrath 已提交
1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683
};

static const struct user_regset_view user_x86_32_view = {
	.name = "i386", .e_machine = EM_386,
	.regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets)
};
#endif

const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
#ifdef CONFIG_IA32_EMULATION
	if (test_tsk_thread_flag(task, TIF_IA32))
#endif
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
		return &user_x86_32_view;
#endif
#ifdef CONFIG_X86_64
	return &user_x86_64_view;
#endif
}

1684 1685
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
					 int error_code, int si_code)
L
Linus Torvalds 已提交
1686 1687 1688 1689 1690 1691 1692 1693
{
	struct siginfo info;

	tsk->thread.trap_no = 1;
	tsk->thread.error_code = error_code;

	memset(&info, 0, sizeof(info));
	info.si_signo = SIGTRAP;
1694
	info.si_code = si_code;
L
Linus Torvalds 已提交
1695

1696 1697
	/* User-mode ip? */
	info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
L
Linus Torvalds 已提交
1698

S
Simon Arlott 已提交
1699
	/* Send us the fake SIGTRAP */
L
Linus Torvalds 已提交
1700 1701 1702
	force_sig_info(SIGTRAP, &info, tsk);
}

1703

1704 1705 1706
#ifdef CONFIG_X86_32
# define IS_IA32	1
#elif defined CONFIG_IA32_EMULATION
1707
# define IS_IA32	is_compat_task()
1708 1709 1710 1711 1712 1713 1714 1715 1716
#else
# define IS_IA32	0
#endif

/*
 * We must return the syscall number to actually look up in the table.
 * This can be -1L to skip running any syscall at all.
 */
asmregparm long syscall_trace_enter(struct pt_regs *regs)
1717
{
1718 1719
	long ret = 0;

R
Roland McGrath 已提交
1720 1721 1722 1723 1724 1725 1726 1727 1728 1729
	/*
	 * If we stepped into a sysenter/syscall insn, it trapped in
	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
	 * If user-mode had set TF itself, then it's still clear from
	 * do_debug() and we need to set it again to restore the user
	 * state.  If we entered on the slow path, TF was already set.
	 */
	if (test_thread_flag(TIF_SINGLESTEP))
		regs->flags |= X86_EFLAGS_TF;

1730 1731 1732
	/* do the secure computing check first */
	secure_computing(regs->orig_ax);

1733 1734 1735
	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
		ret = -1L;

R
Roland McGrath 已提交
1736 1737 1738
	if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
	    tracehook_report_syscall_entry(regs))
		ret = -1L;
1739

1740
	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1741
		trace_sys_enter(regs, regs->orig_ax);
1742

1743
	if (unlikely(current->audit_context)) {
1744
		if (IS_IA32)
1745 1746 1747 1748
			audit_syscall_entry(AUDIT_ARCH_I386,
					    regs->orig_ax,
					    regs->bx, regs->cx,
					    regs->dx, regs->si);
1749 1750
#ifdef CONFIG_X86_64
		else
1751 1752 1753 1754
			audit_syscall_entry(AUDIT_ARCH_X86_64,
					    regs->orig_ax,
					    regs->di, regs->si,
					    regs->dx, regs->r10);
1755
#endif
1756
	}
1757 1758

	return ret ?: regs->orig_ax;
1759 1760
}

1761
asmregparm void syscall_trace_leave(struct pt_regs *regs)
1762 1763 1764 1765
{
	if (unlikely(current->audit_context))
		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);

1766
	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
1767
		trace_sys_exit(regs, regs->ax);
1768

1769
	if (test_thread_flag(TIF_SYSCALL_TRACE))
R
Roland McGrath 已提交
1770
		tracehook_report_syscall_exit(regs, 0);
1771

1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785
	/*
	 * If TIF_SYSCALL_EMU is set, we only get here because of
	 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
	 * We already reported this syscall instruction in
	 * syscall_trace_enter(), so don't do any more now.
	 */
	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
		return;

	/*
	 * If we are single-stepping, synthesize a trap to follow the
	 * system call instruction.
	 */
	if (test_thread_flag(TIF_SINGLESTEP) &&
1786
	    tracehook_consider_fatal_signal(current, SIGTRAP))
1787
		send_sigtrap(current, regs, 0, TRAP_BRKPT);
1788
}