memcpy.S 17.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * Unified implementation of memcpy, memmove and the __copy_user backend.
 *
 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
 * Copyright (C) 2002 Broadcom, Inc.
 *   memcpy/copy_user author: Mark Vandevoorde
12
 * Copyright (C) 2007  Maciej W. Rozycki
13
 * Copyright (C) 2014 Imagination Technologies Ltd.
L
Linus Torvalds 已提交
14 15 16
 *
 * Mnemonic names for arguments to memcpy/__copy_user
 */
17 18 19 20 21 22 23 24

/*
 * Hack to resolve longstanding prefetch issue
 *
 * Prefetching may be fatal on some systems if we're prefetching beyond the
 * end of memory on some systems.  It's also a seriously bad idea on non
 * dma-coherent systems.
 */
25
#ifdef CONFIG_DMA_NONCOHERENT
26 27 28 29 30 31
#undef CONFIG_CPU_HAS_PREFETCH
#endif
#ifdef CONFIG_MIPS_MALTA
#undef CONFIG_CPU_HAS_PREFETCH
#endif

L
Linus Torvalds 已提交
32
#include <asm/asm.h>
33
#include <asm/asm-offsets.h>
34
#include <asm/export.h>
L
Linus Torvalds 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
#include <asm/regdef.h>

#define dst a0
#define src a1
#define len a2

/*
 * Spec
 *
 * memcpy copies len bytes from src to dst and sets v0 to dst.
 * It assumes that
 *   - src and dst don't overlap
 *   - src is readable
 *   - dst is writable
 * memcpy uses the standard calling convention
 *
 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
 * the number of uncopied bytes due to an exception caused by a read or write.
 * __copy_user assumes that src and dst don't overlap, and that the call is
 * implementing one of the following:
 *   copy_to_user
 *     - src is readable  (no exceptions when reading src)
 *   copy_from_user
 *     - dst is writable  (no exceptions when writing dst)
 * __copy_user uses a non-standard calling convention; see
 * include/asm-mips/uaccess.h
 *
 * When an exception happens on a load, the handler must
 # ensure that all of the destination buffer is overwritten to prevent
 * leaking information to user mode programs.
 */

/*
 * Implementation
 */

/*
 * The exception handler for loads requires that:
 *  1- AT contain the address of the byte just past the end of the source
 *     of the copy,
 *  2- src_entry <= src < AT, and
 *  3- (dst - src) == (dst_entry - src_entry),
 * The _entry suffix denotes values when __copy_user was called.
 *
 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
 * (2) is met by incrementing src by the number of bytes copied
 * (3) is met by not doing loads between a pair of increments of dst and src
 *
 * The exception handlers for stores adjust len (if necessary) and return.
 * These handlers do not need to overwrite any data.
 *
 * For __rmemcpy and memmove an exception is always a kernel bug, therefore
 * they're not protected.
 */

90 91 92
/* Instruction type */
#define LD_INSN 1
#define ST_INSN 2
93 94 95
/* Pretech type */
#define SRC_PREFETCH 1
#define DST_PREFETCH 2
96 97 98 99
#define LEGACY_MODE 1
#define EVA_MODE    2
#define USEROP   1
#define KERNELOP 2
100 101 102 103 104 105 106 107 108 109 110

/*
 * Wrapper to add an entry in the exception table
 * in case the insn causes a memory exception.
 * Arguments:
 * insn    : Load/store instruction
 * type    : Instruction type
 * reg     : Register
 * addr    : Address
 * handler : Exception handler
 */
L
Linus Torvalds 已提交
111

112 113 114 115 116 117
#define EXC(insn, type, reg, addr, handler)			\
	.if \mode == LEGACY_MODE;				\
9:		insn reg, addr;					\
		.section __ex_table,"a";			\
		PTR	9b, handler;				\
		.previous;					\
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
	/* This is assembled in EVA mode */			\
	.else;							\
		/* If loading from user or storing to user */	\
		.if ((\from == USEROP) && (type == LD_INSN)) || \
		    ((\to == USEROP) && (type == ST_INSN));	\
9:			__BUILD_EVA_INSN(insn##e, reg, addr);	\
			.section __ex_table,"a";		\
			PTR	9b, handler;			\
			.previous;				\
		.else;						\
			/*					\
			 *  Still in EVA, but no need for	\
			 * exception handler or EVA insn	\
			 */					\
			insn reg, addr;				\
		.endif;						\
134
	.endif
135

L
Linus Torvalds 已提交
136 137 138
/*
 * Only on the 64-bit kernel we can made use of 64-bit registers.
 */
139
#ifdef CONFIG_64BIT
L
Linus Torvalds 已提交
140 141 142 143 144
#define USE_DOUBLE
#endif

#ifdef USE_DOUBLE

145 146 147 148 149 150 151
#define LOADK ld /* No exception */
#define LOAD(reg, addr, handler)	EXC(ld, LD_INSN, reg, addr, handler)
#define LOADL(reg, addr, handler)	EXC(ldl, LD_INSN, reg, addr, handler)
#define LOADR(reg, addr, handler)	EXC(ldr, LD_INSN, reg, addr, handler)
#define STOREL(reg, addr, handler)	EXC(sdl, ST_INSN, reg, addr, handler)
#define STORER(reg, addr, handler)	EXC(sdr, ST_INSN, reg, addr, handler)
#define STORE(reg, addr, handler)	EXC(sd, ST_INSN, reg, addr, handler)
L
Linus Torvalds 已提交
152 153 154 155 156 157 158 159 160 161
#define ADD    daddu
#define SUB    dsubu
#define SRL    dsrl
#define SRA    dsra
#define SLL    dsll
#define SLLV   dsllv
#define SRLV   dsrlv
#define NBYTES 8
#define LOG_NBYTES 3

162
/*
L
Linus Torvalds 已提交
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
 * As we are sharing code base with the mips32 tree (which use the o32 ABI
 * register definitions). We need to redefine the register definitions from
 * the n64 ABI register naming to the o32 ABI register naming.
 */
#undef t0
#undef t1
#undef t2
#undef t3
#define t0	$8
#define t1	$9
#define t2	$10
#define t3	$11
#define t4	$12
#define t5	$13
#define t6	$14
#define t7	$15
179

L
Linus Torvalds 已提交
180 181
#else

182 183 184 185 186 187 188
#define LOADK lw /* No exception */
#define LOAD(reg, addr, handler)	EXC(lw, LD_INSN, reg, addr, handler)
#define LOADL(reg, addr, handler)	EXC(lwl, LD_INSN, reg, addr, handler)
#define LOADR(reg, addr, handler)	EXC(lwr, LD_INSN, reg, addr, handler)
#define STOREL(reg, addr, handler)	EXC(swl, ST_INSN, reg, addr, handler)
#define STORER(reg, addr, handler)	EXC(swr, ST_INSN, reg, addr, handler)
#define STORE(reg, addr, handler)	EXC(sw, ST_INSN, reg, addr, handler)
L
Linus Torvalds 已提交
189 190 191 192 193 194 195 196 197 198 199 200
#define ADD    addu
#define SUB    subu
#define SRL    srl
#define SLL    sll
#define SRA    sra
#define SLLV   sllv
#define SRLV   srlv
#define NBYTES 4
#define LOG_NBYTES 2

#endif /* USE_DOUBLE */

201 202 203
#define LOADB(reg, addr, handler)	EXC(lb, LD_INSN, reg, addr, handler)
#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler)

204 205 206
#define _PREF(hint, addr, type)						\
	.if \mode == LEGACY_MODE;					\
		PREF(hint, addr);					\
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
	.else;								\
		.if ((\from == USEROP) && (type == SRC_PREFETCH)) ||	\
		    ((\to == USEROP) && (type == DST_PREFETCH));	\
			/*						\
			 * PREFE has only 9 bits for the offset		\
			 * compared to PREF which has 16, so it may	\
			 * need to use the $at register but this	\
			 * register should remain intact because it's	\
			 * used later on. Therefore use $v1.		\
			 */						\
			.set at=v1;					\
			PREFE(hint, addr);				\
			.set noat;					\
		.else;							\
			PREF(hint, addr);				\
		.endif;							\
223
	.endif
224 225 226 227

#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)

L
Linus Torvalds 已提交
228 229
#ifdef CONFIG_CPU_LITTLE_ENDIAN
#define LDFIRST LOADR
R
Ralf Baechle 已提交
230
#define LDREST	LOADL
L
Linus Torvalds 已提交
231
#define STFIRST STORER
R
Ralf Baechle 已提交
232
#define STREST	STOREL
L
Linus Torvalds 已提交
233 234 235
#define SHIFT_DISCARD SLLV
#else
#define LDFIRST LOADL
R
Ralf Baechle 已提交
236
#define LDREST	LOADR
L
Linus Torvalds 已提交
237
#define STFIRST STOREL
R
Ralf Baechle 已提交
238
#define STREST	STORER
L
Linus Torvalds 已提交
239 240 241 242 243 244 245 246 247 248 249
#define SHIFT_DISCARD SRLV
#endif

#define FIRST(unit) ((unit)*NBYTES)
#define REST(unit)  (FIRST(unit)+NBYTES-1)
#define UNIT(unit)  FIRST(unit)

#define ADDRMASK (NBYTES-1)

	.text
	.set	noreorder
250
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
L
Linus Torvalds 已提交
251
	.set	noat
252 253 254
#else
	.set	at=v1
#endif
L
Linus Torvalds 已提交
255 256

	.align	5
257 258 259

	/*
	 * Macro to build the __copy_user common code
A
Andrea Gelmini 已提交
260
	 * Arguments:
261 262 263 264 265 266 267 268 269 270 271 272
	 * mode : LEGACY_MODE or EVA_MODE
	 * from : Source operand. USEROP or KERNELOP
	 * to   : Destination operand. USEROP or KERNELOP
	 */
	.macro __BUILD_COPY_USER mode, from, to

	/* initialize __memcpy if this the first time we execute this macro */
	.ifnotdef __memcpy
	.set __memcpy, 1
	.hidden __memcpy /* make sure it does not leak */
	.endif

L
Linus Torvalds 已提交
273 274 275 276 277 278
	/*
	 * Note: dst & src may be unaligned, len may be 0
	 * Temps
	 */
#define rem t8

279
	R10KCBARRIER(0(ra))
L
Linus Torvalds 已提交
280 281 282 283 284 285 286
	/*
	 * The "issue break"s below are very approximate.
	 * Issue delays for dcache fills will perturb the schedule, as will
	 * load queue full replay traps, etc.
	 *
	 * If len < NBYTES use byte operations.
	 */
287 288
	PREFS(	0, 0(src) )
	PREFD(	1, 0(dst) )
L
Linus Torvalds 已提交
289 290
	sltu	t2, len, NBYTES
	and	t1, dst, ADDRMASK
291 292
	PREFS(	0, 1*32(src) )
	PREFD(	1, 1*32(dst) )
293
	bnez	t2, .Lcopy_bytes_checklen\@
L
Linus Torvalds 已提交
294
	 and	t0, src, ADDRMASK
295 296
	PREFS(	0, 2*32(src) )
	PREFD(	1, 2*32(dst) )
297
#ifndef CONFIG_CPU_MIPSR6
298
	bnez	t1, .Ldst_unaligned\@
L
Linus Torvalds 已提交
299
	 nop
300
	bnez	t0, .Lsrc_unaligned_dst_aligned\@
301 302 303 304
#else
	or	t0, t0, t1
	bnez	t0, .Lcopy_unaligned_bytes\@
#endif
L
Linus Torvalds 已提交
305 306 307 308
	/*
	 * use delay slot for fall-through
	 * src and dst are aligned; need to compute rem
	 */
309
.Lboth_aligned\@:
R
Ralf Baechle 已提交
310
	 SRL	t0, len, LOG_NBYTES+3	 # +3 for 8 units/iter
311
	beqz	t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
L
Linus Torvalds 已提交
312
	 and	rem, len, (8*NBYTES-1)	 # rem = len % (8*NBYTES)
313 314
	PREFS(	0, 3*32(src) )
	PREFD(	1, 3*32(dst) )
L
Linus Torvalds 已提交
315 316
	.align	4
1:
317
	R10KCBARRIER(0(ra))
318 319 320 321
	LOAD(t0, UNIT(0)(src), .Ll_exc\@)
	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
L
Linus Torvalds 已提交
322
	SUB	len, len, 8*NBYTES
323 324 325 326 327 328
	LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
	LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
	STORE(t0, UNIT(0)(dst),	.Ls_exc_p8u\@)
	STORE(t1, UNIT(1)(dst),	.Ls_exc_p7u\@)
	LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
	LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
L
Linus Torvalds 已提交
329 330
	ADD	src, src, 8*NBYTES
	ADD	dst, dst, 8*NBYTES
331 332 333 334 335 336
	STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
	STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
	STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
	STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
	STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
	STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
337 338
	PREFS(	0, 8*32(src) )
	PREFD(	1, 8*32(dst) )
L
Linus Torvalds 已提交
339 340 341 342 343 344
	bne	len, rem, 1b
	 nop

	/*
	 * len == rem == the number of bytes left to copy < 8*NBYTES
	 */
345 346
.Lcleanup_both_aligned\@:
	beqz	len, .Ldone\@
L
Linus Torvalds 已提交
347
	 sltu	t0, len, 4*NBYTES
348
	bnez	t0, .Lless_than_4units\@
L
Linus Torvalds 已提交
349 350 351 352
	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
	/*
	 * len >= 4*NBYTES
	 */
353 354 355 356
	LOAD( t0, UNIT(0)(src),	.Ll_exc\@)
	LOAD( t1, UNIT(1)(src),	.Ll_exc_copy\@)
	LOAD( t2, UNIT(2)(src),	.Ll_exc_copy\@)
	LOAD( t3, UNIT(3)(src),	.Ll_exc_copy\@)
L
Linus Torvalds 已提交
357 358
	SUB	len, len, 4*NBYTES
	ADD	src, src, 4*NBYTES
359
	R10KCBARRIER(0(ra))
360 361 362 363
	STORE(t0, UNIT(0)(dst),	.Ls_exc_p4u\@)
	STORE(t1, UNIT(1)(dst),	.Ls_exc_p3u\@)
	STORE(t2, UNIT(2)(dst),	.Ls_exc_p2u\@)
	STORE(t3, UNIT(3)(dst),	.Ls_exc_p1u\@)
364 365
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 4*NBYTES
366
	beqz	len, .Ldone\@
367
	.set	noreorder
368
.Lless_than_4units\@:
L
Linus Torvalds 已提交
369 370 371
	/*
	 * rem = len % NBYTES
	 */
372
	beq	rem, len, .Lcopy_bytes\@
L
Linus Torvalds 已提交
373 374
	 nop
1:
375
	R10KCBARRIER(0(ra))
376
	LOAD(t0, 0(src), .Ll_exc\@)
L
Linus Torvalds 已提交
377 378
	ADD	src, src, NBYTES
	SUB	len, len, NBYTES
379
	STORE(t0, 0(dst), .Ls_exc_p1u\@)
380 381
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, NBYTES
L
Linus Torvalds 已提交
382
	bne	rem, len, 1b
383
	.set	noreorder
L
Linus Torvalds 已提交
384

385
#ifndef CONFIG_CPU_MIPSR6
L
Linus Torvalds 已提交
386 387 388
	/*
	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
	 * A loop would do only a byte at a time with possible branch
R
Ralf Baechle 已提交
389
	 * mispredicts.	 Can't do an explicit LOAD dst,mask,or,STORE
L
Linus Torvalds 已提交
390 391 392 393 394 395 396 397
	 * because can't assume read-access to dst.  Instead, use
	 * STREST dst, which doesn't require read access to dst.
	 *
	 * This code should perform better than a simple loop on modern,
	 * wide-issue mips processors because the code has fewer branches and
	 * more instruction-level parallelism.
	 */
#define bits t2
398
	beqz	len, .Ldone\@
L
Linus Torvalds 已提交
399 400 401
	 ADD	t1, dst, len	# t1 is just past last byte of dst
	li	bits, 8*NBYTES
	SLL	rem, len, 3	# rem = number of bits to keep
402
	LOAD(t0, 0(src), .Ll_exc\@)
R
Ralf Baechle 已提交
403
	SUB	bits, bits, rem # bits = number of bits to discard
L
Linus Torvalds 已提交
404
	SHIFT_DISCARD t0, t0, bits
405
	STREST(t0, -1(t1), .Ls_exc\@)
L
Linus Torvalds 已提交
406 407
	jr	ra
	 move	len, zero
408
.Ldst_unaligned\@:
L
Linus Torvalds 已提交
409 410 411 412 413 414 415 416 417 418
	/*
	 * dst is unaligned
	 * t0 = src & ADDRMASK
	 * t1 = dst & ADDRMASK; T1 > 0
	 * len >= NBYTES
	 *
	 * Copy enough bytes to align dst
	 * Set match = (src and dst have same alignment)
	 */
#define match rem
419
	LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
L
Linus Torvalds 已提交
420
	ADD	t2, zero, NBYTES
421
	LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
L
Linus Torvalds 已提交
422 423
	SUB	t2, t2, t1	# t2 = number of bytes copied
	xor	match, t0, t1
424
	R10KCBARRIER(0(ra))
425 426
	STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
	beq	len, t2, .Ldone\@
L
Linus Torvalds 已提交
427 428
	 SUB	len, len, t2
	ADD	dst, dst, t2
429
	beqz	match, .Lboth_aligned\@
L
Linus Torvalds 已提交
430 431
	 ADD	src, src, t2

432
.Lsrc_unaligned_dst_aligned\@:
R
Ralf Baechle 已提交
433
	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
434
	PREFS(	0, 3*32(src) )
435
	beqz	t0, .Lcleanup_src_unaligned\@
R
Ralf Baechle 已提交
436
	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
437
	PREFD(	1, 3*32(dst) )
L
Linus Torvalds 已提交
438 439 440 441 442 443 444
1:
/*
 * Avoid consecutive LD*'s to the same register since some mips
 * implementations can't issue them in the same cycle.
 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
 * are to the same unit (unless src is aligned, but it's not).
 */
445
	R10KCBARRIER(0(ra))
446 447
	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
R
Ralf Baechle 已提交
448
	SUB	len, len, 4*NBYTES
449 450 451 452 453 454
	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
	LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
	LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
	LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
455
	PREFS(	0, 9*32(src) )		# 0 is PREF_LOAD  (not streamed)
L
Linus Torvalds 已提交
456 457 458 459
	ADD	src, src, 4*NBYTES
#ifdef CONFIG_CPU_SB1
	nop				# improves slotting
#endif
460 461 462 463
	STORE(t0, UNIT(0)(dst),	.Ls_exc_p4u\@)
	STORE(t1, UNIT(1)(dst),	.Ls_exc_p3u\@)
	STORE(t2, UNIT(2)(dst),	.Ls_exc_p2u\@)
	STORE(t3, UNIT(3)(dst),	.Ls_exc_p1u\@)
464
	PREFD(	1, 9*32(dst) )		# 1 is PREF_STORE (not streamed)
465 466
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 4*NBYTES
L
Linus Torvalds 已提交
467
	bne	len, rem, 1b
468
	.set	noreorder
L
Linus Torvalds 已提交
469

470 471
.Lcleanup_src_unaligned\@:
	beqz	len, .Ldone\@
L
Linus Torvalds 已提交
472
	 and	rem, len, NBYTES-1  # rem = len % NBYTES
473
	beq	rem, len, .Lcopy_bytes\@
L
Linus Torvalds 已提交
474 475
	 nop
1:
476
	R10KCBARRIER(0(ra))
477 478
	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
L
Linus Torvalds 已提交
479 480
	ADD	src, src, NBYTES
	SUB	len, len, NBYTES
481
	STORE(t0, 0(dst), .Ls_exc_p1u\@)
482 483
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, NBYTES
L
Linus Torvalds 已提交
484
	bne	len, rem, 1b
485
	.set	noreorder
L
Linus Torvalds 已提交
486

487
#endif /* !CONFIG_CPU_MIPSR6 */
488 489
.Lcopy_bytes_checklen\@:
	beqz	len, .Ldone\@
L
Linus Torvalds 已提交
490
	 nop
491
.Lcopy_bytes\@:
L
Linus Torvalds 已提交
492
	/* 0 < len < NBYTES  */
493
	R10KCBARRIER(0(ra))
L
Linus Torvalds 已提交
494
#define COPY_BYTE(N)			\
495
	LOADB(t0, N(src), .Ll_exc\@);	\
L
Linus Torvalds 已提交
496
	SUB	len, len, 1;		\
497 498
	beqz	len, .Ldone\@;		\
	STOREB(t0, N(dst), .Ls_exc_p1\@)
L
Linus Torvalds 已提交
499 500 501 502 503 504 505 506 507

	COPY_BYTE(0)
	COPY_BYTE(1)
#ifdef USE_DOUBLE
	COPY_BYTE(2)
	COPY_BYTE(3)
	COPY_BYTE(4)
	COPY_BYTE(5)
#endif
508
	LOADB(t0, NBYTES-2(src), .Ll_exc\@)
L
Linus Torvalds 已提交
509 510
	SUB	len, len, 1
	jr	ra
511 512
	STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
.Ldone\@:
L
Linus Torvalds 已提交
513
	jr	ra
514
	 nop
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530

#ifdef CONFIG_CPU_MIPSR6
.Lcopy_unaligned_bytes\@:
1:
	COPY_BYTE(0)
	COPY_BYTE(1)
	COPY_BYTE(2)
	COPY_BYTE(3)
	COPY_BYTE(4)
	COPY_BYTE(5)
	COPY_BYTE(6)
	COPY_BYTE(7)
	ADD	src, src, 8
	b	1b
	 ADD	dst, dst, 8
#endif /* CONFIG_CPU_MIPSR6 */
531
	.if __memcpy == 1
L
Linus Torvalds 已提交
532
	END(memcpy)
533 534 535
	.set __memcpy, 0
	.hidden __memcpy
	.endif
L
Linus Torvalds 已提交
536

537
.Ll_exc_copy\@:
L
Linus Torvalds 已提交
538 539 540 541 542 543 544 545 546 547
	/*
	 * Copy bytes from src until faulting load address (or until a
	 * lb faults)
	 *
	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
	 * may be more than a byte beyond the last address.
	 * Hence, the lb below may get an exception.
	 *
	 * Assumes src < THREAD_BUADDR($28)
	 */
548
	LOADK	t0, TI_TASK($28)
L
Linus Torvalds 已提交
549
	 nop
550
	LOADK	t0, THREAD_BUADDR(t0)
L
Linus Torvalds 已提交
551
1:
552
	LOADB(t1, 0(src), .Ll_exc\@)
L
Linus Torvalds 已提交
553 554
	ADD	src, src, 1
	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
555 556
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 1
L
Linus Torvalds 已提交
557
	bne	src, t0, 1b
558
	.set	noreorder
559
.Ll_exc\@:
560
	LOADK	t0, TI_TASK($28)
L
Linus Torvalds 已提交
561
	 nop
562
	LOADK	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
L
Linus Torvalds 已提交
563 564 565 566 567
	 nop
	SUB	len, AT, t0		# len number of uncopied bytes
	jr	ra
	 nop

568
#define SEXC(n)							\
R
Ralf Baechle 已提交
569
	.set	reorder;			/* DADDI_WAR */ \
570
.Ls_exc_p ## n ## u\@:						\
571 572 573
	ADD	len, len, n*NBYTES;				\
	jr	ra;						\
	.set	noreorder
L
Linus Torvalds 已提交
574 575 576 577 578 579 580 581 582 583

SEXC(8)
SEXC(7)
SEXC(6)
SEXC(5)
SEXC(4)
SEXC(3)
SEXC(2)
SEXC(1)

584
.Ls_exc_p1\@:
585 586
	.set	reorder				/* DADDI_WAR */
	ADD	len, len, 1
L
Linus Torvalds 已提交
587
	jr	ra
588
	.set	noreorder
589
.Ls_exc\@:
L
Linus Torvalds 已提交
590 591
	jr	ra
	 nop
592
	.endm
L
Linus Torvalds 已提交
593 594 595

	.align	5
LEAF(memmove)
596
EXPORT_SYMBOL(memmove)
L
Linus Torvalds 已提交
597 598 599 600 601
	ADD	t0, a0, a2
	ADD	t1, a1, a2
	sltu	t0, a1, t0			# dst + len <= src -> memcpy
	sltu	t1, a0, t1			# dst >= src + len -> memcpy
	and	t0, t1
602
	beqz	t0, .L__memcpy
L
Linus Torvalds 已提交
603
	 move	v0, a0				/* return value */
604
	beqz	a2, .Lr_out
L
Linus Torvalds 已提交
605 606 607 608 609
	END(memmove)

	/* fall through to __rmemcpy */
LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
	 sltu	t0, a1, a0
610
	beqz	t0, .Lr_end_bytes_up		# src >= dst
L
Linus Torvalds 已提交
611 612 613 614
	 nop
	ADD	a0, a2				# dst = dst + len
	ADD	a1, a2				# src = src + len

615
.Lr_end_bytes:
616
	R10KCBARRIER(0(ra))
L
Linus Torvalds 已提交
617 618 619 620
	lb	t0, -1(a1)
	SUB	a2, a2, 0x1
	sb	t0, -1(a0)
	SUB	a1, a1, 0x1
621 622
	.set	reorder				/* DADDI_WAR */
	SUB	a0, a0, 0x1
623
	bnez	a2, .Lr_end_bytes
624
	.set	noreorder
L
Linus Torvalds 已提交
625

626
.Lr_out:
L
Linus Torvalds 已提交
627 628 629
	jr	ra
	 move	a2, zero

630
.Lr_end_bytes_up:
631
	R10KCBARRIER(0(ra))
L
Linus Torvalds 已提交
632 633 634 635
	lb	t0, (a1)
	SUB	a2, a2, 0x1
	sb	t0, (a0)
	ADD	a1, a1, 0x1
636 637
	.set	reorder				/* DADDI_WAR */
	ADD	a0, a0, 0x1
638
	bnez	a2, .Lr_end_bytes_up
639
	.set	noreorder
L
Linus Torvalds 已提交
640 641 642 643

	jr	ra
	 move	a2, zero
	END(__rmemcpy)
644 645 646 647 648 649 650 651 652

/*
 * A combined memcpy/__copy_user
 * __copy_user sets len to 0 for success; else to an upper bound of
 * the number of uncopied bytes.
 * memcpy sets v0 to dst.
 */
	.align	5
LEAF(memcpy)					/* a0=dst a1=src a2=len */
653
EXPORT_SYMBOL(memcpy)
654 655 656
	move	v0, dst				/* return value */
.L__memcpy:
FEXPORT(__copy_user)
657
EXPORT_SYMBOL(__copy_user)
658 659
	/* Legacy Mode, user <-> user */
	__BUILD_COPY_USER LEGACY_MODE USEROP USEROP
660 661 662 663 664 665 666 667 668 669 670 671 672 673 674

#ifdef CONFIG_EVA

/*
 * For EVA we need distinct symbols for reading and writing to user space.
 * This is because we need to use specific EVA instructions to perform the
 * virtual <-> physical translation when a virtual address is actually in user
 * space
 */

/*
 * __copy_from_user (EVA)
 */

LEAF(__copy_from_user_eva)
675
EXPORT_SYMBOL(__copy_from_user_eva)
676 677 678 679 680 681 682 683 684 685
	__BUILD_COPY_USER EVA_MODE USEROP KERNELOP
END(__copy_from_user_eva)



/*
 * __copy_to_user (EVA)
 */

LEAF(__copy_to_user_eva)
686
EXPORT_SYMBOL(__copy_to_user_eva)
687 688 689 690 691 692 693 694
__BUILD_COPY_USER EVA_MODE KERNELOP USEROP
END(__copy_to_user_eva)

/*
 * __copy_in_user (EVA)
 */

LEAF(__copy_in_user_eva)
695
EXPORT_SYMBOL(__copy_in_user_eva)
696 697 698 699
__BUILD_COPY_USER EVA_MODE USEROP USEROP
END(__copy_in_user_eva)

#endif