vm86_32.c 23.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2 3 4 5
/*
 *  Copyright (C) 1994  Linus Torvalds
 *
 *  29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
6
 *                stack - Manfred Spraul <manfred@colorfullife.com>
L
Linus Torvalds 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 *
 *  22 mar 2002 - Manfred detected the stackfaults, but didn't handle
 *                them correctly. Now the emulation will be in a
 *                consistent state after stackfaults - Kasper Dupont
 *                <kasperd@daimi.au.dk>
 *
 *  22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
 *                <kasperd@daimi.au.dk>
 *
 *  ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
 *                caused by Kasper Dupont's changes - Stas Sergeev
 *
 *   4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
 *                Kasper Dupont <kasperd@daimi.au.dk>
 *
 *   9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
 *                Kasper Dupont <kasperd@daimi.au.dk>
 *
 *   9 apr 2002 - Changed stack access macros to jump to a label
 *                instead of returning to userspace. This simplifies
 *                do_int, and is needed by handle_vm6_fault. Kasper
 *                Dupont <kasperd@daimi.au.dk>
 *
 */

32 33
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

34
#include <linux/capability.h>
L
Linus Torvalds 已提交
35 36
#include <linux/errno.h>
#include <linux/interrupt.h>
37
#include <linux/syscalls.h>
L
Linus Torvalds 已提交
38
#include <linux/sched.h>
39
#include <linux/sched/task_stack.h>
L
Linus Torvalds 已提交
40 41 42 43 44 45 46
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/highmem.h>
#include <linux/ptrace.h>
47
#include <linux/audit.h>
48
#include <linux/stddef.h>
49
#include <linux/slab.h>
50
#include <linux/security.h>
L
Linus Torvalds 已提交
51

52
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
53 54 55
#include <asm/io.h>
#include <asm/tlbflush.h>
#include <asm/irq.h>
56
#include <asm/traps.h>
B
Brian Gerst 已提交
57
#include <asm/vm86.h>
58
#include <asm/switch_to.h>
L
Linus Torvalds 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78

/*
 * Known problems:
 *
 * Interrupt handling is not guaranteed:
 * - a real x86 will disable all interrupts for one instruction
 *   after a "mov ss,xx" to make stack handling atomic even without
 *   the 'lss' instruction. We can't guarantee this in v86 mode,
 *   as the next instruction might result in a page fault or similar.
 * - a real x86 will have interrupts disabled for one instruction
 *   past the 'sti' that enables them. We don't bother with all the
 *   details yet.
 *
 * Let's hope these problems do not actually matter for anything.
 */


/*
 * 8- and 16-bit register defines..
 */
79 80 81 82
#define AL(regs)	(((unsigned char *)&((regs)->pt.ax))[0])
#define AH(regs)	(((unsigned char *)&((regs)->pt.ax))[1])
#define IP(regs)	(*(unsigned short *)&((regs)->pt.ip))
#define SP(regs)	(*(unsigned short *)&((regs)->pt.sp))
L
Linus Torvalds 已提交
83 84 85 86

/*
 * virtual flags (16 and 32-bit versions)
 */
87 88
#define VFLAGS	(*(unsigned short *)&(current->thread.vm86->veflags))
#define VEFLAGS	(current->thread.vm86->veflags)
L
Linus Torvalds 已提交
89

90
#define set_flags(X, new, mask) \
L
Linus Torvalds 已提交
91 92 93 94 95
((X) = ((X) & ~(mask)) | ((new) & (mask)))

#define SAFE_MASK	(0xDD5)
#define RETURN_MASK	(0xDFF)

96
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
L
Linus Torvalds 已提交
97
{
98 99
	struct task_struct *tsk = current;
	struct vm86plus_struct __user *user;
100
	struct vm86 *vm86 = current->thread.vm86;
101
	long err = 0;
L
Linus Torvalds 已提交
102 103 104 105 106 107 108 109

	/*
	 * This gets called from entry.S with interrupts disabled, but
	 * from process context. Enable interrupts here, before trying
	 * to access user space.
	 */
	local_irq_enable();

110 111
	if (!vm86 || !vm86->user_vm86) {
		pr_alert("no user_vm86: BAD\n");
L
Linus Torvalds 已提交
112 113
		do_exit(SIGSEGV);
	}
114
	set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
115
	user = vm86->user_vm86;
116

117
	if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ?
118 119
		       sizeof(struct vm86plus_struct) :
		       sizeof(struct vm86_struct))) {
120
		pr_alert("could not access userspace vm86 info\n");
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
		do_exit(SIGSEGV);
	}

	put_user_try {
		put_user_ex(regs->pt.bx, &user->regs.ebx);
		put_user_ex(regs->pt.cx, &user->regs.ecx);
		put_user_ex(regs->pt.dx, &user->regs.edx);
		put_user_ex(regs->pt.si, &user->regs.esi);
		put_user_ex(regs->pt.di, &user->regs.edi);
		put_user_ex(regs->pt.bp, &user->regs.ebp);
		put_user_ex(regs->pt.ax, &user->regs.eax);
		put_user_ex(regs->pt.ip, &user->regs.eip);
		put_user_ex(regs->pt.cs, &user->regs.cs);
		put_user_ex(regs->pt.flags, &user->regs.eflags);
		put_user_ex(regs->pt.sp, &user->regs.esp);
		put_user_ex(regs->pt.ss, &user->regs.ss);
		put_user_ex(regs->es, &user->regs.es);
		put_user_ex(regs->ds, &user->regs.ds);
		put_user_ex(regs->fs, &user->regs.fs);
		put_user_ex(regs->gs, &user->regs.gs);

142
		put_user_ex(vm86->screen_bitmap, &user->screen_bitmap);
143 144
	} put_user_catch(err);
	if (err) {
145
		pr_alert("could not access userspace vm86 info\n");
L
Linus Torvalds 已提交
146 147 148
		do_exit(SIGSEGV);
	}

149
	preempt_disable();
150
	tsk->thread.sp0 = vm86->saved_sp0;
151
	tsk->thread.sysenter_cs = __KERNEL_CS;
152
	update_sp0(tsk);
153
	refresh_sysenter_cs(&tsk->thread);
154
	vm86->saved_sp0 = 0;
155
	preempt_enable();
L
Linus Torvalds 已提交
156

157
	memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
158

159
	lazy_load_gs(vm86->regs32.gs);
160

161
	regs->pt.ax = retval;
L
Linus Torvalds 已提交
162 163
}

164
static void mark_screen_rdonly(struct mm_struct *mm)
L
Linus Torvalds 已提交
165
{
166 167
	struct vm_area_struct *vma;
	spinlock_t *ptl;
L
Linus Torvalds 已提交
168
	pgd_t *pgd;
169
	p4d_t *p4d;
L
Linus Torvalds 已提交
170 171
	pud_t *pud;
	pmd_t *pmd;
172
	pte_t *pte;
L
Linus Torvalds 已提交
173 174
	int i;

175
	down_write(&mm->mmap_sem);
176
	pgd = pgd_offset(mm, 0xA0000);
L
Linus Torvalds 已提交
177 178
	if (pgd_none_or_clear_bad(pgd))
		goto out;
179 180 181 182
	p4d = p4d_offset(pgd, 0xA0000);
	if (p4d_none_or_clear_bad(p4d))
		goto out;
	pud = pud_offset(p4d, 0xA0000);
L
Linus Torvalds 已提交
183 184 185
	if (pud_none_or_clear_bad(pud))
		goto out;
	pmd = pmd_offset(pud, 0xA0000);
186 187

	if (pmd_trans_huge(*pmd)) {
188
		vma = find_vma(mm, 0xA0000);
189 190
		split_huge_pmd(vma, pmd, 0xA0000);
	}
L
Linus Torvalds 已提交
191 192
	if (pmd_none_or_clear_bad(pmd))
		goto out;
193
	pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
L
Linus Torvalds 已提交
194 195 196 197 198
	for (i = 0; i < 32; i++) {
		if (pte_present(*pte))
			set_pte(pte, pte_wrprotect(*pte));
		pte++;
	}
199
	pte_unmap_unlock(pte, ptl);
L
Linus Torvalds 已提交
200
out:
201
	up_write(&mm->mmap_sem);
202
	flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL);
L
Linus Torvalds 已提交
203 204 205 206 207
}



static int do_vm86_irq_handling(int subfunction, int irqnumber);
208
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
L
Linus Torvalds 已提交
209

210
SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86)
L
Linus Torvalds 已提交
211
{
212
	return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false);
L
Linus Torvalds 已提交
213 214 215
}


216
SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
L
Linus Torvalds 已提交
217
{
218
	switch (cmd) {
219 220 221 222
	case VM86_REQUEST_IRQ:
	case VM86_FREE_IRQ:
	case VM86_GET_IRQ_BITS:
	case VM86_GET_AND_RESET_IRQ:
223
		return do_vm86_irq_handling(cmd, (int)arg);
224 225 226 227 228 229 230
	case VM86_PLUS_INSTALL_CHECK:
		/*
		 * NOTE: on old vm86 stuff this will return the error
		 *  from access_ok(), because the subfunction is
		 *  interpreted as (invalid) address to vm86_struct.
		 *  So the installation check works.
		 */
231
		return 0;
L
Linus Torvalds 已提交
232 233 234
	}

	/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
235
	return do_sys_vm86((struct vm86plus_struct __user *) arg, true);
L
Linus Torvalds 已提交
236 237 238
}


239
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
L
Linus Torvalds 已提交
240
{
241
	struct task_struct *tsk = current;
242
	struct vm86 *vm86 = tsk->thread.vm86;
243
	struct kernel_vm86_regs vm86regs;
244
	struct pt_regs *regs = current_pt_regs();
245 246
	unsigned long err = 0;

247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
	err = security_mmap_addr(0);
	if (err) {
		/*
		 * vm86 cannot virtualize the address space, so vm86 users
		 * need to manage the low 1MB themselves using mmap.  Given
		 * that BIOS places important data in the first page, vm86
		 * is essentially useless if mmap_min_addr != 0.  DOSEMU,
		 * for example, won't even bother trying to use vm86 if it
		 * can't map a page at virtual address 0.
		 *
		 * To reduce the available kernel attack surface, simply
		 * disallow vm86(old) for users who cannot mmap at va 0.
		 *
		 * The implementation of security_mmap_addr will allow
		 * suitably privileged users to map va 0 even if
		 * vm.mmap_min_addr is set above 0, and we want this
		 * behavior for vm86 as well, as it ensures that legacy
		 * tools like vbetool will not fail just because of
		 * vm.mmap_min_addr.
		 */
		pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d).  Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n",
			     current->comm, task_pid_nr(current),
			     from_kuid_munged(&init_user_ns, current_uid()));
		return -EPERM;
	}

273 274 275 276 277 278
	if (!vm86) {
		if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
			return -ENOMEM;
		tsk->thread.vm86 = vm86;
	}
	if (vm86->saved_sp0)
279 280
		return -EPERM;

281
	if (!access_ok(VERIFY_READ, user_vm86, plus ?
282 283 284 285
		       sizeof(struct vm86_struct) :
		       sizeof(struct vm86plus_struct)))
		return -EFAULT;

286
	memset(&vm86regs, 0, sizeof(vm86regs));
287 288
	get_user_try {
		unsigned short seg;
289 290 291 292 293 294 295 296 297
		get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx);
		get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx);
		get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx);
		get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi);
		get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi);
		get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp);
		get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax);
		get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip);
		get_user_ex(seg, &user_vm86->regs.cs);
298
		vm86regs.pt.cs = seg;
299 300 301
		get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags);
		get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp);
		get_user_ex(seg, &user_vm86->regs.ss);
302
		vm86regs.pt.ss = seg;
303 304 305 306 307 308 309 310
		get_user_ex(vm86regs.es, &user_vm86->regs.es);
		get_user_ex(vm86regs.ds, &user_vm86->regs.ds);
		get_user_ex(vm86regs.fs, &user_vm86->regs.fs);
		get_user_ex(vm86regs.gs, &user_vm86->regs.gs);

		get_user_ex(vm86->flags, &user_vm86->flags);
		get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap);
		get_user_ex(vm86->cpu_type, &user_vm86->cpu_type);
311 312 313 314
	} get_user_catch(err);
	if (err)
		return err;

315 316
	if (copy_from_user(&vm86->int_revectored,
			   &user_vm86->int_revectored,
317 318
			   sizeof(struct revectored_struct)))
		return -EFAULT;
319 320
	if (copy_from_user(&vm86->int21_revectored,
			   &user_vm86->int21_revectored,
321 322 323
			   sizeof(struct revectored_struct)))
		return -EFAULT;
	if (plus) {
324
		if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus,
325 326
				   sizeof(struct vm86plus_info_struct)))
			return -EFAULT;
327 328 329 330
		vm86->vm86plus.is_vm86pus = 1;
	} else
		memset(&vm86->vm86plus, 0,
		       sizeof(struct vm86plus_info_struct));
331 332

	memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
333
	vm86->user_vm86 = user_vm86;
L
Linus Torvalds 已提交
334 335

/*
336
 * The flags register is also special: we cannot trust that the user
L
Linus Torvalds 已提交
337 338 339
 * has set it up safely, so this makes sure interrupt etc flags are
 * inherited from protected mode.
 */
340 341
	VEFLAGS = vm86regs.pt.flags;
	vm86regs.pt.flags &= SAFE_MASK;
342
	vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
343
	vm86regs.pt.flags |= X86_VM_MASK;
L
Linus Torvalds 已提交
344

345
	vm86regs.pt.orig_ax = regs->orig_ax;
B
Brian Gerst 已提交
346

347
	switch (vm86->cpu_type) {
348
	case CPU_286:
349
		vm86->veflags_mask = 0;
350 351
		break;
	case CPU_386:
352
		vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
353 354
		break;
	case CPU_486:
355
		vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
356 357
		break;
	default:
358
		vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
359
		break;
L
Linus Torvalds 已提交
360 361 362
	}

/*
363
 * Save old state
L
Linus Torvalds 已提交
364
 */
365
	vm86->saved_sp0 = tsk->thread.sp0;
366
	lazy_save_gs(vm86->regs32.gs);
L
Linus Torvalds 已提交
367

368
	/* make room for real-mode segments */
369
	preempt_disable();
370
	tsk->thread.sp0 += 16;
371

372
	if (static_cpu_has(X86_FEATURE_SEP)) {
L
Linus Torvalds 已提交
373
		tsk->thread.sysenter_cs = 0;
374 375
		refresh_sysenter_cs(&tsk->thread);
	}
376

377
	update_sp0(tsk);
378
	preempt_enable();
L
Linus Torvalds 已提交
379

380
	if (vm86->flags & VM86_SCREEN_BITMAP)
381
		mark_screen_rdonly(tsk->mm);
382

383 384 385
	memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
	force_iret();
	return regs->ax;
L
Linus Torvalds 已提交
386 387
}

388
static inline void set_IF(struct kernel_vm86_regs *regs)
L
Linus Torvalds 已提交
389
{
390
	VEFLAGS |= X86_EFLAGS_VIF;
L
Linus Torvalds 已提交
391 392
}

393
static inline void clear_IF(struct kernel_vm86_regs *regs)
L
Linus Torvalds 已提交
394
{
395
	VEFLAGS &= ~X86_EFLAGS_VIF;
L
Linus Torvalds 已提交
396 397
}

398
static inline void clear_TF(struct kernel_vm86_regs *regs)
L
Linus Torvalds 已提交
399
{
400
	regs->pt.flags &= ~X86_EFLAGS_TF;
L
Linus Torvalds 已提交
401 402
}

403
static inline void clear_AC(struct kernel_vm86_regs *regs)
L
Linus Torvalds 已提交
404
{
405
	regs->pt.flags &= ~X86_EFLAGS_AC;
L
Linus Torvalds 已提交
406 407
}

408 409
/*
 * It is correct to call set_IF(regs) from the set_vflags_*
L
Linus Torvalds 已提交
410 411 412
 * functions. However someone forgot to call clear_IF(regs)
 * in the opposite case.
 * After the command sequence CLI PUSHF STI POPF you should
J
Joe Perches 已提交
413
 * end up with interrupts disabled, but you ended up with
L
Linus Torvalds 已提交
414 415 416 417 418 419
 * interrupts enabled.
 *  ( I was testing my own changes, but the only bug I
 *    could find was in a function I had not changed. )
 * [KD]
 */

420
static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
L
Linus Torvalds 已提交
421
{
422
	set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask);
423
	set_flags(regs->pt.flags, flags, SAFE_MASK);
424
	if (flags & X86_EFLAGS_IF)
L
Linus Torvalds 已提交
425 426 427 428 429
		set_IF(regs);
	else
		clear_IF(regs);
}

430
static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
L
Linus Torvalds 已提交
431
{
432
	set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask);
433
	set_flags(regs->pt.flags, flags, SAFE_MASK);
434
	if (flags & X86_EFLAGS_IF)
L
Linus Torvalds 已提交
435 436 437 438 439
		set_IF(regs);
	else
		clear_IF(regs);
}

440
static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
L
Linus Torvalds 已提交
441
{
442
	unsigned long flags = regs->pt.flags & RETURN_MASK;
L
Linus Torvalds 已提交
443

444 445 446
	if (VEFLAGS & X86_EFLAGS_VIF)
		flags |= X86_EFLAGS_IF;
	flags |= X86_EFLAGS_IOPL;
447
	return flags | (VEFLAGS & current->thread.vm86->veflags_mask);
L
Linus Torvalds 已提交
448 449
}

450
static inline int is_revectored(int nr, struct revectored_struct *bitmap)
L
Linus Torvalds 已提交
451
{
452
	return test_bit(nr, bitmap->__map);
L
Linus Torvalds 已提交
453 454 455 456 457 458 459 460 461 462
}

#define val_byte(val, n) (((__u8 *)&val)[n])

#define pushb(base, ptr, val, err_label) \
	do { \
		__u8 __val = val; \
		ptr--; \
		if (put_user(__val, base + ptr) < 0) \
			goto err_label; \
463
	} while (0)
L
Linus Torvalds 已提交
464 465 466 467 468 469 470 471 472 473

#define pushw(base, ptr, val, err_label) \
	do { \
		__u16 __val = val; \
		ptr--; \
		if (put_user(val_byte(__val, 1), base + ptr) < 0) \
			goto err_label; \
		ptr--; \
		if (put_user(val_byte(__val, 0), base + ptr) < 0) \
			goto err_label; \
474
	} while (0)
L
Linus Torvalds 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490

#define pushl(base, ptr, val, err_label) \
	do { \
		__u32 __val = val; \
		ptr--; \
		if (put_user(val_byte(__val, 3), base + ptr) < 0) \
			goto err_label; \
		ptr--; \
		if (put_user(val_byte(__val, 2), base + ptr) < 0) \
			goto err_label; \
		ptr--; \
		if (put_user(val_byte(__val, 1), base + ptr) < 0) \
			goto err_label; \
		ptr--; \
		if (put_user(val_byte(__val, 0), base + ptr) < 0) \
			goto err_label; \
491
	} while (0)
L
Linus Torvalds 已提交
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537

#define popb(base, ptr, err_label) \
	({ \
		__u8 __res; \
		if (get_user(__res, base + ptr) < 0) \
			goto err_label; \
		ptr++; \
		__res; \
	})

#define popw(base, ptr, err_label) \
	({ \
		__u16 __res; \
		if (get_user(val_byte(__res, 0), base + ptr) < 0) \
			goto err_label; \
		ptr++; \
		if (get_user(val_byte(__res, 1), base + ptr) < 0) \
			goto err_label; \
		ptr++; \
		__res; \
	})

#define popl(base, ptr, err_label) \
	({ \
		__u32 __res; \
		if (get_user(val_byte(__res, 0), base + ptr) < 0) \
			goto err_label; \
		ptr++; \
		if (get_user(val_byte(__res, 1), base + ptr) < 0) \
			goto err_label; \
		ptr++; \
		if (get_user(val_byte(__res, 2), base + ptr) < 0) \
			goto err_label; \
		ptr++; \
		if (get_user(val_byte(__res, 3), base + ptr) < 0) \
			goto err_label; \
		ptr++; \
		__res; \
	})

/* There are so many possible reasons for this function to return
 * VM86_INTx, so adding another doesn't bother me. We can expect
 * userspace programs to be able to handle it. (Getting a problem
 * in userspace is always better than an Oops anyway.) [KD]
 */
static void do_int(struct kernel_vm86_regs *regs, int i,
538
    unsigned char __user *ssp, unsigned short sp)
L
Linus Torvalds 已提交
539 540 541
{
	unsigned long __user *intr_ptr;
	unsigned long segoffs;
542
	struct vm86 *vm86 = current->thread.vm86;
L
Linus Torvalds 已提交
543

544
	if (regs->pt.cs == BIOSSEG)
L
Linus Torvalds 已提交
545
		goto cannot_handle;
546
	if (is_revectored(i, &vm86->int_revectored))
L
Linus Torvalds 已提交
547
		goto cannot_handle;
548
	if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored))
L
Linus Torvalds 已提交
549 550 551 552 553 554 555
		goto cannot_handle;
	intr_ptr = (unsigned long __user *) (i << 2);
	if (get_user(segoffs, intr_ptr))
		goto cannot_handle;
	if ((segoffs >> 16) == BIOSSEG)
		goto cannot_handle;
	pushw(ssp, sp, get_vflags(regs), cannot_handle);
556
	pushw(ssp, sp, regs->pt.cs, cannot_handle);
L
Linus Torvalds 已提交
557
	pushw(ssp, sp, IP(regs), cannot_handle);
558
	regs->pt.cs = segoffs >> 16;
L
Linus Torvalds 已提交
559 560 561 562 563 564 565 566
	SP(regs) -= 6;
	IP(regs) = segoffs & 0xffff;
	clear_TF(regs);
	clear_IF(regs);
	clear_AC(regs);
	return;

cannot_handle:
567
	save_v86_state(regs, VM86_INTx + (i << 8));
L
Linus Torvalds 已提交
568 569
}

570
int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
L
Linus Torvalds 已提交
571
{
572 573 574
	struct vm86 *vm86 = current->thread.vm86;

	if (vm86->vm86plus.is_vm86pus) {
575
		if ((trapno == 3) || (trapno == 1)) {
576
			save_v86_state(regs, VM86_TRAP + (trapno << 8));
577 578
			return 0;
		}
579
		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
L
Linus Torvalds 已提交
580 581
		return 0;
	}
582
	if (trapno != 1)
L
Linus Torvalds 已提交
583
		return 1; /* we let this handle by the calling routine */
584
	current->thread.trap_nr = trapno;
L
Linus Torvalds 已提交
585
	current->thread.error_code = error_code;
R
Roland McGrath 已提交
586
	force_sig(SIGTRAP, current);
L
Linus Torvalds 已提交
587 588 589
	return 0;
}

590
void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
L
Linus Torvalds 已提交
591 592 593 594
{
	unsigned char opcode;
	unsigned char __user *csp;
	unsigned char __user *ssp;
595
	unsigned short ip, sp, orig_flags;
L
Linus Torvalds 已提交
596
	int data32, pref_done;
597
	struct vm86plus_info_struct *vmpi = &current->thread.vm86->vm86plus;
L
Linus Torvalds 已提交
598 599

#define CHECK_IF_IN_TRAP \
600
	if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
601
		newflags |= X86_EFLAGS_TF
L
Linus Torvalds 已提交
602

603
	orig_flags = *(unsigned short *)&regs->pt.flags;
604

605 606
	csp = (unsigned char __user *) (regs->pt.cs << 4);
	ssp = (unsigned char __user *) (regs->pt.ss << 4);
L
Linus Torvalds 已提交
607 608 609 610 611 612 613
	sp = SP(regs);
	ip = IP(regs);

	data32 = 0;
	pref_done = 0;
	do {
		switch (opcode = popb(csp, ip, simulate_sigsegv)) {
614 615 616 617 618 619 620 621 622 623 624
		case 0x66:      /* 32-bit data */     data32 = 1; break;
		case 0x67:      /* 32-bit address */  break;
		case 0x2e:      /* CS */              break;
		case 0x3e:      /* DS */              break;
		case 0x26:      /* ES */              break;
		case 0x36:      /* SS */              break;
		case 0x65:      /* GS */              break;
		case 0x64:      /* FS */              break;
		case 0xf2:      /* repnz */       break;
		case 0xf3:      /* rep */             break;
		default: pref_done = 1;
L
Linus Torvalds 已提交
625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
		}
	} while (!pref_done);

	switch (opcode) {

	/* pushf */
	case 0x9c:
		if (data32) {
			pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
			SP(regs) -= 4;
		} else {
			pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
			SP(regs) -= 2;
		}
		IP(regs) = ip;
640
		goto vm86_fault_return;
L
Linus Torvalds 已提交
641 642 643 644 645 646

	/* popf */
	case 0x9d:
		{
		unsigned long newflags;
		if (data32) {
647
			newflags = popl(ssp, sp, simulate_sigsegv);
L
Linus Torvalds 已提交
648 649 650 651 652 653 654
			SP(regs) += 4;
		} else {
			newflags = popw(ssp, sp, simulate_sigsegv);
			SP(regs) += 2;
		}
		IP(regs) = ip;
		CHECK_IF_IN_TRAP;
655
		if (data32)
L
Linus Torvalds 已提交
656
			set_vflags_long(newflags, regs);
657
		else
L
Linus Torvalds 已提交
658
			set_vflags_short(newflags, regs);
659

660
		goto check_vip;
L
Linus Torvalds 已提交
661 662 663 664
		}

	/* int xx */
	case 0xcd: {
665
		int intno = popb(csp, ip, simulate_sigsegv);
L
Linus Torvalds 已提交
666
		IP(regs) = ip;
667
		if (vmpi->vm86dbg_active) {
668 669 670 671
			if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
				save_v86_state(regs, VM86_INTx + (intno << 8));
				return;
			}
L
Linus Torvalds 已提交
672 673 674 675 676 677 678 679 680 681 682 683
		}
		do_int(regs, intno, ssp, sp);
		return;
	}

	/* iret */
	case 0xcf:
		{
		unsigned long newip;
		unsigned long newcs;
		unsigned long newflags;
		if (data32) {
684 685 686
			newip = popl(ssp, sp, simulate_sigsegv);
			newcs = popl(ssp, sp, simulate_sigsegv);
			newflags = popl(ssp, sp, simulate_sigsegv);
L
Linus Torvalds 已提交
687 688 689 690 691 692 693 694
			SP(regs) += 12;
		} else {
			newip = popw(ssp, sp, simulate_sigsegv);
			newcs = popw(ssp, sp, simulate_sigsegv);
			newflags = popw(ssp, sp, simulate_sigsegv);
			SP(regs) += 6;
		}
		IP(regs) = newip;
695
		regs->pt.cs = newcs;
L
Linus Torvalds 已提交
696 697 698 699 700 701
		CHECK_IF_IN_TRAP;
		if (data32) {
			set_vflags_long(newflags, regs);
		} else {
			set_vflags_short(newflags, regs);
		}
702
		goto check_vip;
L
Linus Torvalds 已提交
703 704 705 706 707 708
		}

	/* cli */
	case 0xfa:
		IP(regs) = ip;
		clear_IF(regs);
709
		goto vm86_fault_return;
L
Linus Torvalds 已提交
710 711 712 713 714 715 716 717 718 719 720

	/* sti */
	/*
	 * Damn. This is incorrect: the 'sti' instruction should actually
	 * enable interrupts after the /next/ instruction. Not good.
	 *
	 * Probably needs some horsing around with the TF flag. Aiee..
	 */
	case 0xfb:
		IP(regs) = ip;
		set_IF(regs);
721
		goto check_vip;
L
Linus Torvalds 已提交
722 723

	default:
724
		save_v86_state(regs, VM86_UNKNOWN);
L
Linus Torvalds 已提交
725 726 727 728
	}

	return;

729
check_vip:
730 731
	if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) ==
	    (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) {
732 733 734 735 736 737 738 739 740 741 742 743 744
		save_v86_state(regs, VM86_STI);
		return;
	}

vm86_fault_return:
	if (vmpi->force_return_for_pic  && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
		save_v86_state(regs, VM86_PICRETURN);
		return;
	}
	if (orig_flags & X86_EFLAGS_TF)
		handle_vm86_trap(regs, 0, X86_TRAP_DB);
	return;

L
Linus Torvalds 已提交
745 746 747 748 749 750 751 752 753 754 755
simulate_sigsegv:
	/* FIXME: After a long discussion with Stas we finally
	 *        agreed, that this is wrong. Here we should
	 *        really send a SIGSEGV to the user program.
	 *        But how do we create the correct context? We
	 *        are inside a general protection fault handler
	 *        and has just returned from a page fault handler.
	 *        The correct context for the signal handler
	 *        should be a mixture of the two, but how do we
	 *        get the information? [KD]
	 */
756
	save_v86_state(regs, VM86_UNKNOWN);
L
Linus Torvalds 已提交
757 758 759 760 761 762 763 764 765 766 767 768 769 770
}

/* ---------------- vm86 special IRQ passing stuff ----------------- */

#define VM86_IRQNAME		"vm86irq"

static struct vm86_irqs {
	struct task_struct *tsk;
	int sig;
} vm86_irqs[16];

static DEFINE_SPINLOCK(irqbits_lock);
static int irqbits;

771
#define ALLOWED_SIGS (1 /* 0 = don't send a signal */ \
L
Linus Torvalds 已提交
772
	| (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO)  | (1 << SIGURG) \
773 774
	| (1 << SIGUNUSED))

775
static irqreturn_t irq_handler(int intno, void *dev_id)
L
Linus Torvalds 已提交
776 777 778 779
{
	int irq_bit;
	unsigned long flags;

780
	spin_lock_irqsave(&irqbits_lock, flags);
L
Linus Torvalds 已提交
781
	irq_bit = 1 << intno;
782
	if ((irqbits & irq_bit) || !vm86_irqs[intno].tsk)
L
Linus Torvalds 已提交
783 784 785 786 787 788 789 790
		goto out;
	irqbits |= irq_bit;
	if (vm86_irqs[intno].sig)
		send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
	/*
	 * IRQ will be re-enabled when user asks for the irq (whether
	 * polling or as a result of the signal)
	 */
791 792
	disable_irq_nosync(intno);
	spin_unlock_irqrestore(&irqbits_lock, flags);
L
Linus Torvalds 已提交
793 794 795
	return IRQ_HANDLED;

out:
796
	spin_unlock_irqrestore(&irqbits_lock, flags);
L
Linus Torvalds 已提交
797 798 799 800 801 802 803 804 805 806
	return IRQ_NONE;
}

static inline void free_vm86_irq(int irqnumber)
{
	unsigned long flags;

	free_irq(irqnumber, NULL);
	vm86_irqs[irqnumber].tsk = NULL;

807
	spin_lock_irqsave(&irqbits_lock, flags);
L
Linus Torvalds 已提交
808
	irqbits &= ~(1 << irqnumber);
809
	spin_unlock_irqrestore(&irqbits_lock, flags);
L
Linus Torvalds 已提交
810 811 812 813 814 815 816 817 818 819 820 821 822 823
}

void release_vm86_irqs(struct task_struct *task)
{
	int i;
	for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
	    if (vm86_irqs[i].tsk == task)
		free_vm86_irq(i);
}

static inline int get_and_reset_irq(int irqnumber)
{
	int bit;
	unsigned long flags;
824
	int ret = 0;
825

L
Linus Torvalds 已提交
826 827
	if (invalid_vm86_irq(irqnumber)) return 0;
	if (vm86_irqs[irqnumber].tsk != current) return 0;
828
	spin_lock_irqsave(&irqbits_lock, flags);
L
Linus Torvalds 已提交
829 830
	bit = irqbits & (1 << irqnumber);
	irqbits &= ~bit;
831 832 833 834 835
	if (bit) {
		enable_irq(irqnumber);
		ret = 1;
	}

836
	spin_unlock_irqrestore(&irqbits_lock, flags);
837
	return ret;
L
Linus Torvalds 已提交
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
}


static int do_vm86_irq_handling(int subfunction, int irqnumber)
{
	int ret;
	switch (subfunction) {
		case VM86_GET_AND_RESET_IRQ: {
			return get_and_reset_irq(irqnumber);
		}
		case VM86_GET_IRQ_BITS: {
			return irqbits;
		}
		case VM86_REQUEST_IRQ: {
			int sig = irqnumber >> 8;
			int irq = irqnumber & 255;
			if (!capable(CAP_SYS_ADMIN)) return -EPERM;
			if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
			if (invalid_vm86_irq(irq)) return -EPERM;
			if (vm86_irqs[irq].tsk) return -EPERM;
			ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
			if (ret) return ret;
			vm86_irqs[irq].sig = sig;
			vm86_irqs[irq].tsk = current;
			return irq;
		}
		case  VM86_FREE_IRQ: {
			if (invalid_vm86_irq(irqnumber)) return -EPERM;
			if (!vm86_irqs[irqnumber].tsk) return 0;
			if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
			free_vm86_irq(irqnumber);
			return 0;
		}
	}
	return -EINVAL;
}