copy_page_64.S 2.1 KB
Newer Older
L
Linus Torvalds 已提交
1
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2

3 4
#include <linux/linkage.h>
#include <asm/dwarf2.h>
5
#include <asm/alternative-asm.h>
6 7

	ALIGN
8
copy_page_rep:
9
	CFI_STARTPROC
10 11
	movl	$4096/8, %ecx
	rep	movsq
12 13
	ret
	CFI_ENDPROC
14
ENDPROC(copy_page_rep)
15

16 17 18 19
/*
 *  Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
 *  Could vary the prefetch distance based on SMP/UP.
*/
L
Linus Torvalds 已提交
20

21 22
ENTRY(copy_page)
	CFI_STARTPROC
23
	subq	$2*8,	%rsp
24
	CFI_ADJUST_CFA_OFFSET 2*8
25
	movq	%rbx,	(%rsp)
26
	CFI_REL_OFFSET rbx, 0
27
	movq	%r12,	1*8(%rsp)
28
	CFI_REL_OFFSET r12, 1*8
29

30
	movl	$(4096/64)-5,	%ecx
31 32
	.p2align 4
.Loop64:
33 34 35 36 37 38 39 40 41
	dec	%rcx
	movq	0x8*0(%rsi), %rax
	movq	0x8*1(%rsi), %rbx
	movq	0x8*2(%rsi), %rdx
	movq	0x8*3(%rsi), %r8
	movq	0x8*4(%rsi), %r9
	movq	0x8*5(%rsi), %r10
	movq	0x8*6(%rsi), %r11
	movq	0x8*7(%rsi), %r12
42 43 44

	prefetcht0 5*64(%rsi)

45 46 47 48 49 50 51 52
	movq	%rax, 0x8*0(%rdi)
	movq	%rbx, 0x8*1(%rdi)
	movq	%rdx, 0x8*2(%rdi)
	movq	%r8,  0x8*3(%rdi)
	movq	%r9,  0x8*4(%rdi)
	movq	%r10, 0x8*5(%rdi)
	movq	%r11, 0x8*6(%rdi)
	movq	%r12, 0x8*7(%rdi)
53

54 55
	leaq	64 (%rsi), %rsi
	leaq	64 (%rdi), %rdi
56

57
	jnz	.Loop64
58

59
	movl	$5, %ecx
60 61
	.p2align 4
.Loop2:
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
	decl	%ecx

	movq	0x8*0(%rsi), %rax
	movq	0x8*1(%rsi), %rbx
	movq	0x8*2(%rsi), %rdx
	movq	0x8*3(%rsi), %r8
	movq	0x8*4(%rsi), %r9
	movq	0x8*5(%rsi), %r10
	movq	0x8*6(%rsi), %r11
	movq	0x8*7(%rsi), %r12

	movq	%rax, 0x8*0(%rdi)
	movq	%rbx, 0x8*1(%rdi)
	movq	%rdx, 0x8*2(%rdi)
	movq	%r8,  0x8*3(%rdi)
	movq	%r9,  0x8*4(%rdi)
	movq	%r10, 0x8*5(%rdi)
	movq	%r11, 0x8*6(%rdi)
	movq	%r12, 0x8*7(%rdi)

	leaq	64(%rdi), %rdi
	leaq	64(%rsi), %rsi
84 85
	jnz	.Loop2

86
	movq	(%rsp), %rbx
87
	CFI_RESTORE rbx
88
	movq	1*8(%rsp), %r12
89
	CFI_RESTORE r12
90
	addq	$2*8, %rsp
91
	CFI_ADJUST_CFA_OFFSET -2*8
92
	ret
93 94 95
.Lcopy_page_end:
	CFI_ENDPROC
ENDPROC(copy_page)
96 97 98 99 100 101

	/* Some CPUs run faster using the string copy instructions.
	   It is also a lot simpler. Use this when possible */

#include <asm/cpufeature.h>

102 103
	.section .altinstr_replacement,"ax"
1:	.byte 0xeb					/* jmp <disp8> */
104
	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */
105 106
2:
	.previous
107
	.section .altinstructions,"a"
108
	altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,	\
109
		.Lcopy_page_end-copy_page, 2b-1b, 0
110
	.previous