copy_user_64.S 7.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6
/* Copyright 2002 Andi Kleen, SuSE Labs.
 * Subject to the GNU Public License v2.
 * 
 * Functions to copy from and to user space.		
 */		 

7 8 9
#include <linux/linkage.h>
#include <asm/dwarf2.h>

10 11
#define FIX_ALIGNMENT 1

12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>

	.macro ALTERNATIVE_JUMP feature,orig,alt
0:
	.byte 0xe9	/* 32bit jump */
	.long \orig-1f	/* by default jump to orig */
1:
	.section .altinstr_replacement,"ax"
2:	.byte 0xe9	             /* near jump with 32bit immediate */
	.long \alt-1b /* offset */   /* or alternatively to alt */
	.previous
	.section .altinstructions,"a"
	.align 8
	.quad  0b
	.quad  2b
	.byte  \feature		     /* when feature is set */
	.byte  5
	.byte  5
	.previous
	.endm
L
Linus Torvalds 已提交
35 36

/* Standard copy_to_user with segment limit checking */		
37 38
ENTRY(copy_to_user)
	CFI_STARTPROC
L
Linus Torvalds 已提交
39 40 41 42 43 44
	GET_THREAD_INFO(%rax)
	movq %rdi,%rcx
	addq %rdx,%rcx
	jc  bad_to_user
	cmpq threadinfo_addr_limit(%rax),%rcx
	jae bad_to_user
45 46
	xorl %eax,%eax	/* clear zero flag */
	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
47
	CFI_ENDPROC
48

49 50 51 52 53 54 55 56 57 58 59
ENTRY(copy_user_generic)
	CFI_STARTPROC
	movl $1,%ecx	/* set zero flag */
	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
	CFI_ENDPROC

ENTRY(__copy_from_user_inatomic)
	CFI_STARTPROC
	xorl %ecx,%ecx	/* clear zero flag */
	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
	CFI_ENDPROC
L
Linus Torvalds 已提交
60 61

/* Standard copy_from_user with segment limit checking */	
62 63
ENTRY(copy_from_user)
	CFI_STARTPROC
L
Linus Torvalds 已提交
64 65 66 67 68 69
	GET_THREAD_INFO(%rax)
	movq %rsi,%rcx
	addq %rdx,%rcx
	jc  bad_from_user
	cmpq threadinfo_addr_limit(%rax),%rcx
	jae  bad_from_user
70 71
	movl $1,%ecx	/* set zero flag */
	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
72 73
	CFI_ENDPROC
ENDPROC(copy_from_user)
L
Linus Torvalds 已提交
74 75 76 77
	
	.section .fixup,"ax"
	/* must zero dest */
bad_from_user:
78
	CFI_STARTPROC
L
Linus Torvalds 已提交
79 80 81 82 83 84 85
	movl %edx,%ecx
	xorl %eax,%eax
	rep
	stosb
bad_to_user:
	movl	%edx,%eax
	ret
86 87
	CFI_ENDPROC
END(bad_from_user)
L
Linus Torvalds 已提交
88 89 90 91
	.previous
	
		
/*
92 93
 * copy_user_generic_unrolled - memory copy with exception handling.
 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
L
Linus Torvalds 已提交
94 95 96 97 98
 * 	
 * Input:	
 * rdi destination
 * rsi source
 * rdx count
99
 * ecx zero flag -- if true zero destination on error
L
Linus Torvalds 已提交
100 101 102 103
 *
 * Output:		
 * eax uncopied bytes or 0 if successful.
 */
104
ENTRY(copy_user_generic_unrolled)
105
	CFI_STARTPROC
106
	pushq %rbx
107 108
	CFI_ADJUST_CFA_OFFSET 8
	CFI_REL_OFFSET rbx, 0
109 110 111
	pushq %rcx
	CFI_ADJUST_CFA_OFFSET 8
	CFI_REL_OFFSET rcx, 0
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
	xorl %eax,%eax		/*zero for the exception handler */

#ifdef FIX_ALIGNMENT
	/* check for bad alignment of destination */
	movl %edi,%ecx
	andl $7,%ecx
	jnz  .Lbad_alignment
.Lafter_bad_alignment:
#endif

	movq %rdx,%rcx

	movl $64,%ebx
	shrq $6,%rdx
	decq %rdx
	js   .Lhandle_tail

	.p2align 4
.Lloop:
.Ls1:	movq (%rsi),%r11
.Ls2:	movq 1*8(%rsi),%r8
.Ls3:	movq 2*8(%rsi),%r9
.Ls4:	movq 3*8(%rsi),%r10
.Ld1:	movq %r11,(%rdi)
.Ld2:	movq %r8,1*8(%rdi)
.Ld3:	movq %r9,2*8(%rdi)
.Ld4:	movq %r10,3*8(%rdi)

.Ls5:	movq 4*8(%rsi),%r11
.Ls6:	movq 5*8(%rsi),%r8
.Ls7:	movq 6*8(%rsi),%r9
.Ls8:	movq 7*8(%rsi),%r10
.Ld5:	movq %r11,4*8(%rdi)
.Ld6:	movq %r8,5*8(%rdi)
.Ld7:	movq %r9,6*8(%rdi)
.Ld8:	movq %r10,7*8(%rdi)

	decq %rdx

	leaq 64(%rsi),%rsi
	leaq 64(%rdi),%rdi

	jns  .Lloop

	.p2align 4
.Lhandle_tail:
	movl %ecx,%edx
	andl $63,%ecx
	shrl $3,%ecx
	jz   .Lhandle_7
	movl $8,%ebx
	.p2align 4
.Lloop_8:
.Ls9:	movq (%rsi),%r8
.Ld9:	movq %r8,(%rdi)
	decl %ecx
	leaq 8(%rdi),%rdi
	leaq 8(%rsi),%rsi
	jnz .Lloop_8

.Lhandle_7:
	movl %edx,%ecx
	andl $7,%ecx
	jz   .Lende
	.p2align 4
.Lloop_1:
.Ls10:	movb (%rsi),%bl
.Ld10:	movb %bl,(%rdi)
	incq %rdi
	incq %rsi
	decl %ecx
	jnz .Lloop_1

185
	CFI_REMEMBER_STATE
186
.Lende:
187 188 189
	popq %rcx
	CFI_ADJUST_CFA_OFFSET -8
	CFI_RESTORE rcx
190
	popq %rbx
191 192
	CFI_ADJUST_CFA_OFFSET -8
	CFI_RESTORE rbx
193
	ret
194
	CFI_RESTORE_STATE
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275

#ifdef FIX_ALIGNMENT
	/* align destination */
	.p2align 4
.Lbad_alignment:
	movl $8,%r9d
	subl %ecx,%r9d
	movl %r9d,%ecx
	cmpq %r9,%rdx
	jz   .Lhandle_7
	js   .Lhandle_7
.Lalign_1:
.Ls11:	movb (%rsi),%bl
.Ld11:	movb %bl,(%rdi)
	incq %rsi
	incq %rdi
	decl %ecx
	jnz .Lalign_1
	subq %r9,%rdx
	jmp .Lafter_bad_alignment
#endif

	/* table sorted by exception address */
	.section __ex_table,"a"
	.align 8
	.quad .Ls1,.Ls1e
	.quad .Ls2,.Ls2e
	.quad .Ls3,.Ls3e
	.quad .Ls4,.Ls4e
	.quad .Ld1,.Ls1e
	.quad .Ld2,.Ls2e
	.quad .Ld3,.Ls3e
	.quad .Ld4,.Ls4e
	.quad .Ls5,.Ls5e
	.quad .Ls6,.Ls6e
	.quad .Ls7,.Ls7e
	.quad .Ls8,.Ls8e
	.quad .Ld5,.Ls5e
	.quad .Ld6,.Ls6e
	.quad .Ld7,.Ls7e
	.quad .Ld8,.Ls8e
	.quad .Ls9,.Le_quad
	.quad .Ld9,.Le_quad
	.quad .Ls10,.Le_byte
	.quad .Ld10,.Le_byte
#ifdef FIX_ALIGNMENT
	.quad .Ls11,.Lzero_rest
	.quad .Ld11,.Lzero_rest
#endif
	.quad .Le5,.Le_zero
	.previous

	/* compute 64-offset for main loop. 8 bytes accuracy with error on the
	   pessimistic side. this is gross. it would be better to fix the
	   interface. */
	/* eax: zero, ebx: 64 */
.Ls1e: 	addl $8,%eax
.Ls2e: 	addl $8,%eax
.Ls3e: 	addl $8,%eax
.Ls4e: 	addl $8,%eax
.Ls5e: 	addl $8,%eax
.Ls6e: 	addl $8,%eax
.Ls7e: 	addl $8,%eax
.Ls8e: 	addl $8,%eax
	addq %rbx,%rdi	/* +64 */
	subq %rax,%rdi  /* correct destination with computed offset */

	shlq $6,%rdx	/* loop counter * 64 (stride length) */
	addq %rax,%rdx	/* add offset to loopcnt */
	andl $63,%ecx	/* remaining bytes */
	addq %rcx,%rdx	/* add them */
	jmp .Lzero_rest

	/* exception on quad word loop in tail handling */
	/* ecx:	loopcnt/8, %edx: length, rdi: correct */
.Le_quad:
	shll $3,%ecx
	andl $7,%edx
	addl %ecx,%edx
	/* edx: bytes to zero, rdi: dest, eax:zero */
.Lzero_rest:
276 277
	cmpl $0,(%rsp)
	jz   .Le_zero
278 279 280 281 282 283 284 285 286
	movq %rdx,%rcx
.Le_byte:
	xorl %eax,%eax
.Le5:	rep
	stosb
	/* when there is another exception while zeroing the rest just return */
.Le_zero:
	movq %rdx,%rax
	jmp .Lende
287 288 289
	CFI_ENDPROC
ENDPROC(copy_user_generic)

290 291 292 293 294 295 296 297 298

	/* Some CPUs run faster using the string copy instructions.
	   This is also a lot simpler. Use them when possible.
	   Patch in jmps to this code instead of copying it fully
	   to avoid unwanted aliasing in the exception tables. */

 /* rdi	destination
  * rsi source
  * rdx count
299
  * ecx zero flag
300 301 302 303 304 305 306 307 308 309
  *
  * Output:
  * eax uncopied bytes or 0 if successfull.
  *
  * Only 4GB of copy is supported. This shouldn't be a problem
  * because the kernel normally only writes from/to page sized chunks
  * even if user space passed a longer buffer.
  * And more would be dangerous because both Intel and AMD have
  * errata with rep movsq > 4GB. If someone feels the need to fix
  * this please consider this.
310 311
  */
ENTRY(copy_user_generic_string)
312
	CFI_STARTPROC
313
	movl %ecx,%r8d		/* save zero flag */
L
Linus Torvalds 已提交
314 315 316
	movl %edx,%ecx
	shrl $3,%ecx
	andl $7,%edx	
317
	jz   10f
L
Linus Torvalds 已提交
318 319 320 321 322
1:	rep 
	movsq 
	movl %edx,%ecx
2:	rep
	movsb
323
9:	movl %ecx,%eax
L
Linus Torvalds 已提交
324
	ret
325 326 327 328 329

	/* multiple of 8 byte */
10:	rep
	movsq
	xor %eax,%eax
L
Linus Torvalds 已提交
330
	ret
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345

	/* exception handling */
3:      lea (%rdx,%rcx,8),%rax	/* exception on quad loop */
	jmp 6f
5:	movl %ecx,%eax		/* exception on byte loop */
	/* eax: left over bytes */
6:	testl %r8d,%r8d		/* zero flag set? */
	jz 7f
	movl %eax,%ecx		/* initialize x86 loop counter */
	push %rax
	xorl %eax,%eax
8:	rep
	stosb 			/* zero the rest */
11:	pop %rax
7:	ret
346 347
	CFI_ENDPROC
END(copy_user_generic_c)
348

L
Linus Torvalds 已提交
349 350
	.section __ex_table,"a"
	.quad 1b,3b
351 352 353
	.quad 2b,5b
	.quad 8b,11b
	.quad 10b,3b
L
Linus Torvalds 已提交
354
	.previous