align.c 24.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9
/* align.c - handle alignment exceptions for the Power PC.
 *
 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
 * Copyright (c) 1998-1999 TiVo, Inc.
 *   PowerPC 403GCX modifications.
 * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
 *   PowerPC 403GCX/405GP modifications.
 * Copyright (c) 2001-2002 PPC64 team, IBM Corp
 *   64-bit and Power4 support
10 11 12
 * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
 *                    <benh@kernel.crashing.org>
 *   Merge ppc32 and ppc64 implementations
L
Linus Torvalds 已提交
13 14 15 16 17 18 19 20 21 22 23 24 25 26
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/cache.h>
#include <asm/cputable.h>
27
#include <asm/emulated_ops.h>
L
Linus Torvalds 已提交
28 29 30 31 32 33 34 35 36 37 38

struct aligninfo {
	unsigned char len;
	unsigned char flags;
};

#define IS_XFORM(inst)	(((inst) >> 26) == 31)
#define IS_DSFORM(inst)	(((inst) >> 26) >= 56)

#define INVALID	{ 0, 0 }

39 40 41
/* Bits in the flags field */
#define LD	0	/* load */
#define ST	1	/* store */
42
#define SE	2	/* sign-extend value, or FP ld/st as word */
43 44 45 46 47 48
#define F	4	/* to/from fp regs */
#define U	8	/* update index register */
#define M	0x10	/* multiple load/store */
#define SW	0x20	/* byte swap */
#define S	0x40	/* single-precision fp or... */
#define SX	0x40	/* ... byte count in XER */
49
#define HARD	0x80	/* string, stwcx. */
50 51
#define E4	0x40	/* SPE endianness is word */
#define E8	0x80	/* SPE endianness is double word */
52
#define SPLT	0x80	/* VSX SPLAT load */
L
Linus Torvalds 已提交
53

54
/* DSISR bits reported for a DCBZ instruction: */
L
Linus Torvalds 已提交
55 56
#define DCBZ	0x5f	/* 8xx/82xx dcbz faults when cache not enabled */

57 58
#define SWAP(a, b)	(t = (a), (a) = (b), (b) = t)

L
Linus Torvalds 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
/*
 * The PowerPC stores certain bits of the instruction that caused the
 * alignment exception in the DSISR register.  This array maps those
 * bits to information about the operand length and what the
 * instruction would do.
 */
static struct aligninfo aligninfo[128] = {
	{ 4, LD },		/* 00 0 0000: lwz / lwarx */
	INVALID,		/* 00 0 0001 */
	{ 4, ST },		/* 00 0 0010: stw */
	INVALID,		/* 00 0 0011 */
	{ 2, LD },		/* 00 0 0100: lhz */
	{ 2, LD+SE },		/* 00 0 0101: lha */
	{ 2, ST },		/* 00 0 0110: sth */
	{ 4, LD+M },		/* 00 0 0111: lmw */
74
	{ 4, LD+F+S },		/* 00 0 1000: lfs */
L
Linus Torvalds 已提交
75
	{ 8, LD+F },		/* 00 0 1001: lfd */
76
	{ 4, ST+F+S },		/* 00 0 1010: stfs */
L
Linus Torvalds 已提交
77 78
	{ 8, ST+F },		/* 00 0 1011: stfd */
	INVALID,		/* 00 0 1100 */
79
	{ 8, LD },		/* 00 0 1101: ld/ldu/lwa */
L
Linus Torvalds 已提交
80
	INVALID,		/* 00 0 1110 */
81
	{ 8, ST },		/* 00 0 1111: std/stdu */
L
Linus Torvalds 已提交
82 83 84 85 86 87 88 89
	{ 4, LD+U },		/* 00 1 0000: lwzu */
	INVALID,		/* 00 1 0001 */
	{ 4, ST+U },		/* 00 1 0010: stwu */
	INVALID,		/* 00 1 0011 */
	{ 2, LD+U },		/* 00 1 0100: lhzu */
	{ 2, LD+SE+U },		/* 00 1 0101: lhau */
	{ 2, ST+U },		/* 00 1 0110: sthu */
	{ 4, ST+M },		/* 00 1 0111: stmw */
90
	{ 4, LD+F+S+U },	/* 00 1 1000: lfsu */
L
Linus Torvalds 已提交
91
	{ 8, LD+F+U },		/* 00 1 1001: lfdu */
92
	{ 4, ST+F+S+U },	/* 00 1 1010: stfsu */
L
Linus Torvalds 已提交
93
	{ 8, ST+F+U },		/* 00 1 1011: stfdu */
94
	{ 16, LD+F },		/* 00 1 1100: lfdp */
L
Linus Torvalds 已提交
95
	INVALID,		/* 00 1 1101 */
96
	{ 16, ST+F },		/* 00 1 1110: stfdp */
L
Linus Torvalds 已提交
97 98 99 100 101 102 103 104 105
	INVALID,		/* 00 1 1111 */
	{ 8, LD },		/* 01 0 0000: ldx */
	INVALID,		/* 01 0 0001 */
	{ 8, ST },		/* 01 0 0010: stdx */
	INVALID,		/* 01 0 0011 */
	INVALID,		/* 01 0 0100 */
	{ 4, LD+SE },		/* 01 0 0101: lwax */
	INVALID,		/* 01 0 0110 */
	INVALID,		/* 01 0 0111 */
106 107 108 109
	{ 4, LD+M+HARD+SX },	/* 01 0 1000: lswx */
	{ 4, LD+M+HARD },	/* 01 0 1001: lswi */
	{ 4, ST+M+HARD+SX },	/* 01 0 1010: stswx */
	{ 4, ST+M+HARD },	/* 01 0 1011: stswi */
L
Linus Torvalds 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
	INVALID,		/* 01 0 1100 */
	{ 8, LD+U },		/* 01 0 1101: ldu */
	INVALID,		/* 01 0 1110 */
	{ 8, ST+U },		/* 01 0 1111: stdu */
	{ 8, LD+U },		/* 01 1 0000: ldux */
	INVALID,		/* 01 1 0001 */
	{ 8, ST+U },		/* 01 1 0010: stdux */
	INVALID,		/* 01 1 0011 */
	INVALID,		/* 01 1 0100 */
	{ 4, LD+SE+U },		/* 01 1 0101: lwaux */
	INVALID,		/* 01 1 0110 */
	INVALID,		/* 01 1 0111 */
	INVALID,		/* 01 1 1000 */
	INVALID,		/* 01 1 1001 */
	INVALID,		/* 01 1 1010 */
	INVALID,		/* 01 1 1011 */
	INVALID,		/* 01 1 1100 */
	INVALID,		/* 01 1 1101 */
	INVALID,		/* 01 1 1110 */
	INVALID,		/* 01 1 1111 */
	INVALID,		/* 10 0 0000 */
	INVALID,		/* 10 0 0001 */
132
	INVALID,		/* 10 0 0010: stwcx. */
L
Linus Torvalds 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
	INVALID,		/* 10 0 0011 */
	INVALID,		/* 10 0 0100 */
	INVALID,		/* 10 0 0101 */
	INVALID,		/* 10 0 0110 */
	INVALID,		/* 10 0 0111 */
	{ 4, LD+SW },		/* 10 0 1000: lwbrx */
	INVALID,		/* 10 0 1001 */
	{ 4, ST+SW },		/* 10 0 1010: stwbrx */
	INVALID,		/* 10 0 1011 */
	{ 2, LD+SW },		/* 10 0 1100: lhbrx */
	{ 4, LD+SE },		/* 10 0 1101  lwa */
	{ 2, ST+SW },		/* 10 0 1110: sthbrx */
	INVALID,		/* 10 0 1111 */
	INVALID,		/* 10 1 0000 */
	INVALID,		/* 10 1 0001 */
	INVALID,		/* 10 1 0010 */
	INVALID,		/* 10 1 0011 */
	INVALID,		/* 10 1 0100 */
	INVALID,		/* 10 1 0101 */
	INVALID,		/* 10 1 0110 */
	INVALID,		/* 10 1 0111 */
	INVALID,		/* 10 1 1000 */
	INVALID,		/* 10 1 1001 */
	INVALID,		/* 10 1 1010 */
	INVALID,		/* 10 1 1011 */
	INVALID,		/* 10 1 1100 */
	INVALID,		/* 10 1 1101 */
	INVALID,		/* 10 1 1110 */
161
	{ 0, ST+HARD },		/* 10 1 1111: dcbz */
L
Linus Torvalds 已提交
162 163 164 165 166 167 168 169
	{ 4, LD },		/* 11 0 0000: lwzx */
	INVALID,		/* 11 0 0001 */
	{ 4, ST },		/* 11 0 0010: stwx */
	INVALID,		/* 11 0 0011 */
	{ 2, LD },		/* 11 0 0100: lhzx */
	{ 2, LD+SE },		/* 11 0 0101: lhax */
	{ 2, ST },		/* 11 0 0110: sthx */
	INVALID,		/* 11 0 0111 */
170
	{ 4, LD+F+S },		/* 11 0 1000: lfsx */
L
Linus Torvalds 已提交
171
	{ 8, LD+F },		/* 11 0 1001: lfdx */
172
	{ 4, ST+F+S },		/* 11 0 1010: stfsx */
L
Linus Torvalds 已提交
173
	{ 8, ST+F },		/* 11 0 1011: stfdx */
174 175 176 177
	{ 16, LD+F },		/* 11 0 1100: lfdpx */
	{ 4, LD+F+SE },		/* 11 0 1101: lfiwax */
	{ 16, ST+F },		/* 11 0 1110: stfdpx */
	{ 4, ST+F },		/* 11 0 1111: stfiwx */
L
Linus Torvalds 已提交
178 179 180 181 182 183 184 185
	{ 4, LD+U },		/* 11 1 0000: lwzux */
	INVALID,		/* 11 1 0001 */
	{ 4, ST+U },		/* 11 1 0010: stwux */
	INVALID,		/* 11 1 0011 */
	{ 2, LD+U },		/* 11 1 0100: lhzux */
	{ 2, LD+SE+U },		/* 11 1 0101: lhaux */
	{ 2, ST+U },		/* 11 1 0110: sthux */
	INVALID,		/* 11 1 0111 */
186
	{ 4, LD+F+S+U },	/* 11 1 1000: lfsux */
L
Linus Torvalds 已提交
187
	{ 8, LD+F+U },		/* 11 1 1001: lfdux */
188
	{ 4, ST+F+S+U },	/* 11 1 1010: stfsux */
L
Linus Torvalds 已提交
189 190
	{ 8, ST+F+U },		/* 11 1 1011: stfdux */
	INVALID,		/* 11 1 1100 */
191
	{ 4, LD+F },		/* 11 1 1101: lfiwzx */
L
Linus Torvalds 已提交
192 193 194 195
	INVALID,		/* 11 1 1110 */
	INVALID,		/* 11 1 1111 */
};

196 197 198
/*
 * Create a DSISR value from the instruction
 */
L
Linus Torvalds 已提交
199 200 201
static inline unsigned make_dsisr(unsigned instr)
{
	unsigned dsisr;
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221


	/* bits  6:15 --> 22:31 */
	dsisr = (instr & 0x03ff0000) >> 16;

	if (IS_XFORM(instr)) {
		/* bits 29:30 --> 15:16 */
		dsisr |= (instr & 0x00000006) << 14;
		/* bit     25 -->    17 */
		dsisr |= (instr & 0x00000040) << 8;
		/* bits 21:24 --> 18:21 */
		dsisr |= (instr & 0x00000780) << 3;
	} else {
		/* bit      5 -->    17 */
		dsisr |= (instr & 0x04000000) >> 12;
		/* bits  1: 4 --> 18:21 */
		dsisr |= (instr & 0x78000000) >> 17;
		/* bits 30:31 --> 12:13 */
		if (IS_DSFORM(instr))
			dsisr |= (instr & 0x00000003) << 18;
L
Linus Torvalds 已提交
222
	}
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247

	return dsisr;
}

/*
 * The dcbz (data cache block zero) instruction
 * gives an alignment fault if used on non-cacheable
 * memory.  We handle the fault mainly for the
 * case when we are running with the cache disabled
 * for debugging.
 */
static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
{
	long __user *p;
	int i, size;

#ifdef __powerpc64__
	size = ppc64_caches.dline_size;
#else
	size = L1_CACHE_BYTES;
#endif
	p = (long __user *) (regs->dar & -size);
	if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
		return -EFAULT;
	for (i = 0; i < size / sizeof(long); ++i)
248
		if (__put_user_inatomic(0, p+i))
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
			return -EFAULT;
	return 1;
}

/*
 * Emulate load & store multiple instructions
 * On 64-bit machines, these instructions only affect/use the
 * bottom 4 bytes of each register, and the loads clear the
 * top 4 bytes of the affected register.
 */
#ifdef CONFIG_PPC64
#define REG_BYTE(rp, i)		*((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
#else
#define REG_BYTE(rp, i)		*((u8 *)(rp) + (i))
#endif

265 266
#define SWIZ_PTR(p)		((unsigned char __user *)((p) ^ swiz))

267 268
static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
			    unsigned int reg, unsigned int nb,
269 270
			    unsigned int flags, unsigned int instr,
			    unsigned long swiz)
271 272
{
	unsigned long *rptr;
273 274
	unsigned int nb0, i, bswiz;
	unsigned long p;
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292

	/*
	 * We do not try to emulate 8 bytes multiple as they aren't really
	 * available in our operating environments and we don't try to
	 * emulate multiples operations in kernel land as they should never
	 * be used/generated there at least not on unaligned boundaries
	 */
	if (unlikely((nb > 4) || !user_mode(regs)))
		return 0;

	/* lmw, stmw, lswi/x, stswi/x */
	nb0 = 0;
	if (flags & HARD) {
		if (flags & SX) {
			nb = regs->xer & 127;
			if (nb == 0)
				return 1;
		} else {
293 294
			unsigned long pc = regs->nip ^ (swiz & 4);

295 296
			if (__get_user_inatomic(instr,
						(unsigned int __user *)pc))
297
				return -EFAULT;
298 299
			if (swiz == 0 && (flags & SW))
				instr = cpu_to_le32(instr);
300 301 302
			nb = (instr >> 11) & 0x1f;
			if (nb == 0)
				nb = 32;
L
Linus Torvalds 已提交
303
		}
304 305 306 307 308 309 310
		if (nb + reg * 4 > 128) {
			nb0 = nb + reg * 4 - 128;
			nb = 128 - reg * 4;
		}
	} else {
		/* lwm, stmw */
		nb = (32 - reg) * 4;
L
Linus Torvalds 已提交
311
	}
312 313 314 315 316

	if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
		return -EFAULT;	/* bad address */

	rptr = &regs->gpr[reg];
317 318 319 320
	p = (unsigned long) addr;
	bswiz = (flags & SW)? 3: 0;

	if (!(flags & ST)) {
321 322 323 324 325 326 327 328 329 330
		/*
		 * This zeroes the top 4 bytes of the affected registers
		 * in 64-bit mode, and also zeroes out any remaining
		 * bytes of the last register for lsw*.
		 */
		memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
		if (nb0 > 0)
			memset(&regs->gpr[0], 0,
			       ((nb0 + 3) / 4) * sizeof(unsigned long));

331
		for (i = 0; i < nb; ++i, ++p)
332 333
			if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
						SWIZ_PTR(p)))
334 335 336 337
				return -EFAULT;
		if (nb0 > 0) {
			rptr = &regs->gpr[0];
			addr += nb;
338
			for (i = 0; i < nb0; ++i, ++p)
339 340 341
				if (__get_user_inatomic(REG_BYTE(rptr,
								 i ^ bswiz),
							SWIZ_PTR(p)))
342 343 344 345
					return -EFAULT;
		}

	} else {
346
		for (i = 0; i < nb; ++i, ++p)
347 348
			if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
						SWIZ_PTR(p)))
349 350 351 352
				return -EFAULT;
		if (nb0 > 0) {
			rptr = &regs->gpr[0];
			addr += nb;
353
			for (i = 0; i < nb0; ++i, ++p)
354 355 356
				if (__put_user_inatomic(REG_BYTE(rptr,
								 i ^ bswiz),
							SWIZ_PTR(p)))
357 358 359 360
					return -EFAULT;
		}
	}
	return 1;
L
Linus Torvalds 已提交
361 362
}

363 364 365 366 367
/*
 * Emulate floating-point pair loads and stores.
 * Only POWER6 has these instructions, and it does true little-endian,
 * so we don't need the address swizzling.
 */
368 369
static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
			   unsigned int flags)
370
{
371 372 373
	char *ptr0 = (char *) &current->thread.TS_FPR(reg);
	char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
	int i, ret, sw = 0;
374 375 376 377 378

	if (!(flags & F))
		return 0;
	if (reg & 1)
		return 0;	/* invalid form: FRS/FRT must be even */
379 380 381 382 383 384 385 386 387 388
	if (flags & SW)
		sw = 7;
	ret = 0;
	for (i = 0; i < 8; ++i) {
		if (!(flags & ST)) {
			ret |= __get_user(ptr0[i^sw], addr + i);
			ret |= __get_user(ptr1[i^sw], addr + i + 8);
		} else {
			ret |= __put_user(ptr0[i^sw], addr + i);
			ret |= __put_user(ptr1[i^sw], addr + i + 8);
389 390 391 392 393 394 395
		}
	}
	if (ret)
		return -EFAULT;
	return 1;	/* exception handled and fixed up */
}

396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
#ifdef CONFIG_SPE

static struct aligninfo spe_aligninfo[32] = {
	{ 8, LD+E8 },		/* 0 00 00: evldd[x] */
	{ 8, LD+E4 },		/* 0 00 01: evldw[x] */
	{ 8, LD },		/* 0 00 10: evldh[x] */
	INVALID,		/* 0 00 11 */
	{ 2, LD },		/* 0 01 00: evlhhesplat[x] */
	INVALID,		/* 0 01 01 */
	{ 2, LD },		/* 0 01 10: evlhhousplat[x] */
	{ 2, LD+SE },		/* 0 01 11: evlhhossplat[x] */
	{ 4, LD },		/* 0 10 00: evlwhe[x] */
	INVALID,		/* 0 10 01 */
	{ 4, LD },		/* 0 10 10: evlwhou[x] */
	{ 4, LD+SE },		/* 0 10 11: evlwhos[x] */
	{ 4, LD+E4 },		/* 0 11 00: evlwwsplat[x] */
	INVALID,		/* 0 11 01 */
	{ 4, LD },		/* 0 11 10: evlwhsplat[x] */
	INVALID,		/* 0 11 11 */

	{ 8, ST+E8 },		/* 1 00 00: evstdd[x] */
	{ 8, ST+E4 },		/* 1 00 01: evstdw[x] */
	{ 8, ST },		/* 1 00 10: evstdh[x] */
	INVALID,		/* 1 00 11 */
	INVALID,		/* 1 01 00 */
	INVALID,		/* 1 01 01 */
	INVALID,		/* 1 01 10 */
	INVALID,		/* 1 01 11 */
	{ 4, ST },		/* 1 10 00: evstwhe[x] */
	INVALID,		/* 1 10 01 */
	{ 4, ST },		/* 1 10 10: evstwho[x] */
	INVALID,		/* 1 10 11 */
	{ 4, ST+E4 },		/* 1 11 00: evstwwe[x] */
	INVALID,		/* 1 11 01 */
	{ 4, ST+E4 },		/* 1 11 10: evstwwo[x] */
	INVALID,		/* 1 11 11 */
};

#define	EVLDD		0x00
#define	EVLDW		0x01
#define	EVLDH		0x02
#define	EVLHHESPLAT	0x04
#define	EVLHHOUSPLAT	0x06
#define	EVLHHOSSPLAT	0x07
#define	EVLWHE		0x08
#define	EVLWHOU		0x0A
#define	EVLWHOS		0x0B
#define	EVLWWSPLAT	0x0C
#define	EVLWHSPLAT	0x0E
#define	EVSTDD		0x10
#define	EVSTDW		0x11
#define	EVSTDH		0x12
#define	EVSTWHE		0x18
#define	EVSTWHO		0x1A
#define	EVSTWWE		0x1C
#define	EVSTWWO		0x1E

/*
 * Emulate SPE loads and stores.
 * Only Book-E has these instructions, and it does true little-endian,
 * so we don't need the address swizzling.
 */
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
		       unsigned int instr)
{
	int t, ret;
	union {
		u64 ll;
		u32 w[2];
		u16 h[4];
		u8 v[8];
	} data, temp;
	unsigned char __user *p, *addr;
	unsigned long *evr = &current->thread.evr[reg];
	unsigned int nb, flags;

	instr = (instr >> 1) & 0x1f;

	/* DAR has the operand effective address */
	addr = (unsigned char __user *)regs->dar;

	nb = spe_aligninfo[instr].len;
	flags = spe_aligninfo[instr].flags;

	/* Verify the address of the operand */
	if (unlikely(user_mode(regs) &&
		     !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
				addr, nb)))
		return -EFAULT;

	/* userland only */
	if (unlikely(!user_mode(regs)))
		return 0;

	flush_spe_to_thread(current);

	/* If we are loading, get the data from user space, else
	 * get it from register values
	 */
	if (flags & ST) {
		data.ll = 0;
		switch (instr) {
		case EVSTDD:
		case EVSTDW:
		case EVSTDH:
			data.w[0] = *evr;
			data.w[1] = regs->gpr[reg];
			break;
		case EVSTWHE:
			data.h[2] = *evr >> 16;
			data.h[3] = regs->gpr[reg] >> 16;
			break;
		case EVSTWHO:
			data.h[2] = *evr & 0xffff;
			data.h[3] = regs->gpr[reg] & 0xffff;
			break;
		case EVSTWWE:
			data.w[1] = *evr;
			break;
		case EVSTWWO:
			data.w[1] = regs->gpr[reg];
			break;
		default:
			return -EINVAL;
		}
	} else {
		temp.ll = data.ll = 0;
		ret = 0;
		p = addr;

		switch (nb) {
		case 8:
			ret |= __get_user_inatomic(temp.v[0], p++);
			ret |= __get_user_inatomic(temp.v[1], p++);
			ret |= __get_user_inatomic(temp.v[2], p++);
			ret |= __get_user_inatomic(temp.v[3], p++);
		case 4:
			ret |= __get_user_inatomic(temp.v[4], p++);
			ret |= __get_user_inatomic(temp.v[5], p++);
		case 2:
			ret |= __get_user_inatomic(temp.v[6], p++);
			ret |= __get_user_inatomic(temp.v[7], p++);
			if (unlikely(ret))
				return -EFAULT;
		}

		switch (instr) {
		case EVLDD:
		case EVLDW:
		case EVLDH:
			data.ll = temp.ll;
			break;
		case EVLHHESPLAT:
			data.h[0] = temp.h[3];
			data.h[2] = temp.h[3];
			break;
		case EVLHHOUSPLAT:
		case EVLHHOSSPLAT:
			data.h[1] = temp.h[3];
			data.h[3] = temp.h[3];
			break;
		case EVLWHE:
			data.h[0] = temp.h[2];
			data.h[2] = temp.h[3];
			break;
		case EVLWHOU:
		case EVLWHOS:
			data.h[1] = temp.h[2];
			data.h[3] = temp.h[3];
			break;
		case EVLWWSPLAT:
			data.w[0] = temp.w[1];
			data.w[1] = temp.w[1];
			break;
		case EVLWHSPLAT:
			data.h[0] = temp.h[2];
			data.h[1] = temp.h[2];
			data.h[2] = temp.h[3];
			data.h[3] = temp.h[3];
			break;
		default:
			return -EINVAL;
		}
	}

	if (flags & SW) {
		switch (flags & 0xf0) {
		case E8:
			SWAP(data.v[0], data.v[7]);
			SWAP(data.v[1], data.v[6]);
			SWAP(data.v[2], data.v[5]);
			SWAP(data.v[3], data.v[4]);
			break;
		case E4:

			SWAP(data.v[0], data.v[3]);
			SWAP(data.v[1], data.v[2]);
			SWAP(data.v[4], data.v[7]);
			SWAP(data.v[5], data.v[6]);
			break;
		/* Its half word endian */
		default:
			SWAP(data.v[0], data.v[1]);
			SWAP(data.v[2], data.v[3]);
			SWAP(data.v[4], data.v[5]);
			SWAP(data.v[6], data.v[7]);
			break;
		}
	}

	if (flags & SE) {
		data.w[0] = (s16)data.h[1];
		data.w[1] = (s16)data.h[3];
	}

	/* Store result to memory or update registers */
	if (flags & ST) {
		ret = 0;
		p = addr;
		switch (nb) {
		case 8:
			ret |= __put_user_inatomic(data.v[0], p++);
			ret |= __put_user_inatomic(data.v[1], p++);
			ret |= __put_user_inatomic(data.v[2], p++);
			ret |= __put_user_inatomic(data.v[3], p++);
		case 4:
			ret |= __put_user_inatomic(data.v[4], p++);
			ret |= __put_user_inatomic(data.v[5], p++);
		case 2:
			ret |= __put_user_inatomic(data.v[6], p++);
			ret |= __put_user_inatomic(data.v[7], p++);
		}
		if (unlikely(ret))
			return -EFAULT;
	} else {
		*evr = data.w[0];
		regs->gpr[reg] = data.w[1];
	}

	return 1;
}
#endif /* CONFIG_SPE */
638

639 640 641 642 643 644
#ifdef CONFIG_VSX
/*
 * Emulate VSX instructions...
 */
static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
		       unsigned int areg, struct pt_regs *regs,
645 646
		       unsigned int flags, unsigned int length,
		       unsigned int elsize)
647
{
648
	char *ptr;
649
	unsigned long *lptr;
650
	int ret = 0;
651 652
	int sw = 0;
	int i, j;
653 654 655

	flush_vsx_to_thread(current);

656 657 658 659 660
	if (reg < 32)
		ptr = (char *) &current->thread.TS_FPR(reg);
	else
		ptr = (char *) &current->thread.vr[reg - 32];

661 662 663 664 665 666 667 668 669 670 671
	lptr = (unsigned long *) ptr;

	if (flags & SW)
		sw = elsize-1;

	for (j = 0; j < length; j += elsize) {
		for (i = 0; i < elsize; ++i) {
			if (flags & ST)
				ret |= __put_user(ptr[i^sw], addr + i);
			else
				ret |= __get_user(ptr[i^sw], addr + i);
672
		}
673 674
		ptr  += elsize;
		addr += elsize;
675
	}
676 677 678 679 680 681 682 683 684 685 686 687

	if (!ret) {
		if (flags & U)
			regs->gpr[areg] = regs->dar;

		/* Splat load copies the same data to top and bottom 8 bytes */
		if (flags & SPLT)
			lptr[1] = lptr[0];
		/* For 8 byte loads, zero the top 8 bytes */
		else if (!(flags & ST) && (8 == length))
			lptr[1] = 0;
	} else
688
		return -EFAULT;
689

690 691 692 693
	return 1;
}
#endif

694 695 696 697 698 699 700 701 702
/*
 * Called on alignment exception. Attempts to fixup
 *
 * Return 1 on success
 * Return 0 if unable to handle the interrupt
 * Return -EFAULT if data address is bad
 */

int fix_alignment(struct pt_regs *regs)
L
Linus Torvalds 已提交
703
{
704
	unsigned int instr, nb, flags, instruction = 0;
705 706
	unsigned int reg, areg;
	unsigned int dsisr;
L
Linus Torvalds 已提交
707
	unsigned char __user *addr;
708
	unsigned long p, swiz;
709
	int ret, t;
L
Linus Torvalds 已提交
710
	union {
711
		u64 ll;
L
Linus Torvalds 已提交
712 713 714 715 716 717 718 719 720 721 722 723 724
		double dd;
		unsigned char v[8];
		struct {
			unsigned hi32;
			int	 low32;
		} x32;
		struct {
			unsigned char hi48[6];
			short	      low16;
		} x16;
	} data;

	/*
725 726
	 * We require a complete register set, if not, then our assembly
	 * is broken
L
Linus Torvalds 已提交
727
	 */
728
	CHECK_FULL_REGS(regs);
L
Linus Torvalds 已提交
729 730 731

	dsisr = regs->dsisr;

732 733 734
	/* Some processors don't provide us with a DSISR we can use here,
	 * let's make one up from the instruction
	 */
L
Linus Torvalds 已提交
735
	if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
736 737 738 739
		unsigned long pc = regs->nip;

		if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
			pc ^= 4;
740 741
		if (unlikely(__get_user_inatomic(instr,
						 (unsigned int __user *)pc)))
742
			return -EFAULT;
743 744 745
		if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
			instr = cpu_to_le32(instr);
		dsisr = make_dsisr(instr);
746
		instruction = instr;
L
Linus Torvalds 已提交
747 748 749 750 751
	}

	/* extract the operation and registers from the dsisr */
	reg = (dsisr >> 5) & 0x1f;	/* source/dest register */
	areg = dsisr & 0x1f;		/* register to update */
752 753

#ifdef CONFIG_SPE
754
	if ((instr >> 26) == 0x4) {
755
		PPC_WARN_ALIGNMENT(spe, regs);
756
		return emulate_spe(regs, reg, instr);
757
	}
758 759
#endif

L
Linus Torvalds 已提交
760 761 762 763 764 765 766
	instr = (dsisr >> 10) & 0x7f;
	instr |= (dsisr >> 13) & 0x60;

	/* Lookup the operation in our table */
	nb = aligninfo[instr].len;
	flags = aligninfo[instr].flags;

767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
	/* Byteswap little endian loads and stores */
	swiz = 0;
	if (regs->msr & MSR_LE) {
		flags ^= SW;
		/*
		 * So-called "PowerPC little endian" mode works by
		 * swizzling addresses rather than by actually doing
		 * any byte-swapping.  To emulate this, we XOR each
		 * byte address with 7.  We also byte-swap, because
		 * the processor's address swizzling depends on the
		 * operand size (it xors the address with 7 for bytes,
		 * 6 for halfwords, 4 for words, 0 for doublewords) but
		 * we will xor with 7 and load/store each byte separately.
		 */
		if (cpu_has_feature(CPU_FTR_PPC_LE))
			swiz = 7;
	}

L
Linus Torvalds 已提交
785 786 787
	/* DAR has the operand effective address */
	addr = (unsigned char __user *)regs->dar;

788 789
#ifdef CONFIG_VSX
	if ((instruction & 0xfc00003e) == 0x7c000018) {
790 791 792
		unsigned int elsize;

		/* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
793 794
		reg |= (instruction & 0x1) << 5;
		/* Simple inline decoder instead of a table */
795 796
		/* VSX has only 8 and 16 byte memory accesses */
		nb = 8;
797 798
		if (instruction & 0x200)
			nb = 16;
799 800 801 802 803 804 805

		/* Vector stores in little-endian mode swap individual
		   elements, so process them separately */
		elsize = 4;
		if (instruction & 0x80)
			elsize = 8;

806
		flags = 0;
807 808
		if (regs->msr & MSR_LE)
			flags |= SW;
809 810 811 812 813 814 815 816 817
		if (instruction & 0x100)
			flags |= ST;
		if (instruction & 0x040)
			flags |= U;
		/* splat load needs a special decoder */
		if ((instruction & 0x400) == 0){
			flags |= SPLT;
			nb = 8;
		}
818
		PPC_WARN_ALIGNMENT(vsx, regs);
819
		return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
820 821
	}
#endif
822 823
	/* A size of 0 indicates an instruction we don't support, with
	 * the exception of DCBZ which is handled as a special case here
L
Linus Torvalds 已提交
824
	 */
825
	if (instr == DCBZ) {
826
		PPC_WARN_ALIGNMENT(dcbz, regs);
827
		return emulate_dcbz(regs, addr);
828
	}
829 830 831 832 833 834
	if (unlikely(nb == 0))
		return 0;

	/* Load/Store Multiple instructions are handled in their own
	 * function
	 */
835
	if (flags & M) {
836
		PPC_WARN_ALIGNMENT(multiple, regs);
837 838
		return emulate_multiple(regs, addr, reg, nb,
					flags, instr, swiz);
839
	}
L
Linus Torvalds 已提交
840 841

	/* Verify the address of the operand */
842 843 844 845
	if (unlikely(user_mode(regs) &&
		     !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
				addr, nb)))
		return -EFAULT;
L
Linus Torvalds 已提交
846 847 848

	/* Force the fprs into the save area so we can reference them */
	if (flags & F) {
849 850
		/* userland only */
		if (unlikely(!user_mode(regs)))
L
Linus Torvalds 已提交
851 852 853
			return 0;
		flush_fp_to_thread(current);
	}
854

855
	/* Special case for 16-byte FP loads and stores */
856
	if (nb == 16) {
857
		PPC_WARN_ALIGNMENT(fp_pair, regs);
858
		return emulate_fp_pair(addr, reg, flags);
859 860
	}

861
	PPC_WARN_ALIGNMENT(unaligned, regs);
862

863 864 865
	/* If we are loading, get the data from user space, else
	 * get it from register values
	 */
866
	if (!(flags & ST)) {
L
Linus Torvalds 已提交
867 868
		data.ll = 0;
		ret = 0;
869
		p = (unsigned long) addr;
L
Linus Torvalds 已提交
870 871
		switch (nb) {
		case 8:
872 873 874 875
			ret |= __get_user_inatomic(data.v[0], SWIZ_PTR(p++));
			ret |= __get_user_inatomic(data.v[1], SWIZ_PTR(p++));
			ret |= __get_user_inatomic(data.v[2], SWIZ_PTR(p++));
			ret |= __get_user_inatomic(data.v[3], SWIZ_PTR(p++));
L
Linus Torvalds 已提交
876
		case 4:
877 878
			ret |= __get_user_inatomic(data.v[4], SWIZ_PTR(p++));
			ret |= __get_user_inatomic(data.v[5], SWIZ_PTR(p++));
L
Linus Torvalds 已提交
879
		case 2:
880 881
			ret |= __get_user_inatomic(data.v[6], SWIZ_PTR(p++));
			ret |= __get_user_inatomic(data.v[7], SWIZ_PTR(p++));
882
			if (unlikely(ret))
L
Linus Torvalds 已提交
883 884
				return -EFAULT;
		}
885
	} else if (flags & F) {
886
		data.dd = current->thread.TS_FPR(reg);
887 888 889 890 891 892 893 894 895 896 897 898
		if (flags & S) {
			/* Single-precision FP store requires conversion... */
#ifdef CONFIG_PPC_FPU
			preempt_disable();
			enable_kernel_fp();
			cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
			preempt_enable();
#else
			return 0;
#endif
		}
	} else
899 900
		data.ll = regs->gpr[reg];

901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
	if (flags & SW) {
		switch (nb) {
		case 8:
			SWAP(data.v[0], data.v[7]);
			SWAP(data.v[1], data.v[6]);
			SWAP(data.v[2], data.v[5]);
			SWAP(data.v[3], data.v[4]);
			break;
		case 4:
			SWAP(data.v[4], data.v[7]);
			SWAP(data.v[5], data.v[6]);
			break;
		case 2:
			SWAP(data.v[6], data.v[7]);
			break;
		}
	}

	/* Perform other misc operations like sign extension
920 921
	 * or floating point single precision conversion
	 */
922
	switch (flags & ~(U|SW)) {
923 924
	case LD+SE:	/* sign extending integer loads */
	case LD+F+SE:	/* sign extend for lfiwax */
L
Linus Torvalds 已提交
925 926 927 928
		if ( nb == 2 )
			data.ll = data.x16.low16;
		else	/* nb must be 4 */
			data.ll = data.x32.low32;
929 930
		break;

931
	/* Single-precision FP load requires conversion... */
932 933 934 935 936 937 938 939 940 941
	case LD+F+S:
#ifdef CONFIG_PPC_FPU
		preempt_disable();
		enable_kernel_fp();
		cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
		preempt_enable();
#else
		return 0;
#endif
		break;
L
Linus Torvalds 已提交
942
	}
943 944

	/* Store result to memory or update registers */
L
Linus Torvalds 已提交
945 946
	if (flags & ST) {
		ret = 0;
947
		p = (unsigned long) addr;
L
Linus Torvalds 已提交
948 949
		switch (nb) {
		case 8:
950 951 952 953
			ret |= __put_user_inatomic(data.v[0], SWIZ_PTR(p++));
			ret |= __put_user_inatomic(data.v[1], SWIZ_PTR(p++));
			ret |= __put_user_inatomic(data.v[2], SWIZ_PTR(p++));
			ret |= __put_user_inatomic(data.v[3], SWIZ_PTR(p++));
L
Linus Torvalds 已提交
954
		case 4:
955 956
			ret |= __put_user_inatomic(data.v[4], SWIZ_PTR(p++));
			ret |= __put_user_inatomic(data.v[5], SWIZ_PTR(p++));
L
Linus Torvalds 已提交
957
		case 2:
958 959
			ret |= __put_user_inatomic(data.v[6], SWIZ_PTR(p++));
			ret |= __put_user_inatomic(data.v[7], SWIZ_PTR(p++));
L
Linus Torvalds 已提交
960
		}
961
		if (unlikely(ret))
L
Linus Torvalds 已提交
962
			return -EFAULT;
963
	} else if (flags & F)
964
		current->thread.TS_FPR(reg) = data.dd;
965 966 967
	else
		regs->gpr[reg] = data.ll;

L
Linus Torvalds 已提交
968
	/* Update RA as needed */
969
	if (flags & U)
L
Linus Torvalds 已提交
970 971 972 973
		regs->gpr[areg] = regs->dar;

	return 1;
}