提交 32ee1e18 编写于 作者: A Anton Blanchard 提交者: Benjamin Herrenschmidt

powerpc: Fix endian issues in VMX copy loops

Fix the permute loops for little endian.
Signed-off-by: NAnton Blanchard <anton@samba.org>
Signed-off-by: NBenjamin Herrenschmidt <benh@kernel.crashing.org>
上级 8b5ede69
......@@ -19,6 +19,14 @@
*/
#include <asm/ppc_asm.h>
#ifdef __BIG_ENDIAN__
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
#else
#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
#endif
.macro err1
100:
.section __ex_table,"a"
......@@ -552,13 +560,13 @@ err3; stw r7,4(r3)
li r10,32
li r11,48
lvsl vr16,0,r4 /* Setup permute control vector */
LVS(vr16,0,r4) /* Setup permute control vector */
err3; lvx vr0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
err3; lvx vr1,r0,r4
vperm vr8,vr0,vr1,vr16
VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
err3; stvx vr8,r0,r3
addi r3,r3,16
......@@ -566,9 +574,9 @@ err3; stvx vr8,r0,r3
5: bf cr7*4+2,6f
err3; lvx vr1,r0,r4
vperm vr8,vr0,vr1,vr16
VPERM(vr8,vr0,vr1,vr16)
err3; lvx vr0,r4,r9
vperm vr9,vr1,vr0,vr16
VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
......@@ -576,13 +584,13 @@ err3; stvx vr9,r3,r9
6: bf cr7*4+1,7f
err3; lvx vr3,r0,r4
vperm vr8,vr0,vr3,vr16
VPERM(vr8,vr0,vr3,vr16)
err3; lvx vr2,r4,r9
vperm vr9,vr3,vr2,vr16
VPERM(vr9,vr3,vr2,vr16)
err3; lvx vr1,r4,r10
vperm vr10,vr2,vr1,vr16
VPERM(vr10,vr2,vr1,vr16)
err3; lvx vr0,r4,r11
vperm vr11,vr1,vr0,vr16
VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
......@@ -611,21 +619,21 @@ err3; stvx vr11,r3,r11
.align 5
8:
err4; lvx vr7,r0,r4
vperm vr8,vr0,vr7,vr16
VPERM(vr8,vr0,vr7,vr16)
err4; lvx vr6,r4,r9
vperm vr9,vr7,vr6,vr16
VPERM(vr9,vr7,vr6,vr16)
err4; lvx vr5,r4,r10
vperm vr10,vr6,vr5,vr16
VPERM(vr10,vr6,vr5,vr16)
err4; lvx vr4,r4,r11
vperm vr11,vr5,vr4,vr16
VPERM(vr11,vr5,vr4,vr16)
err4; lvx vr3,r4,r12
vperm vr12,vr4,vr3,vr16
VPERM(vr12,vr4,vr3,vr16)
err4; lvx vr2,r4,r14
vperm vr13,vr3,vr2,vr16
VPERM(vr13,vr3,vr2,vr16)
err4; lvx vr1,r4,r15
vperm vr14,vr2,vr1,vr16
VPERM(vr14,vr2,vr1,vr16)
err4; lvx vr0,r4,r16
vperm vr15,vr1,vr0,vr16
VPERM(vr15,vr1,vr0,vr16)
addi r4,r4,128
err4; stvx vr8,r0,r3
err4; stvx vr9,r3,r9
......@@ -649,13 +657,13 @@ err4; stvx vr15,r3,r16
bf cr7*4+1,9f
err3; lvx vr3,r0,r4
vperm vr8,vr0,vr3,vr16
VPERM(vr8,vr0,vr3,vr16)
err3; lvx vr2,r4,r9
vperm vr9,vr3,vr2,vr16
VPERM(vr9,vr3,vr2,vr16)
err3; lvx vr1,r4,r10
vperm vr10,vr2,vr1,vr16
VPERM(vr10,vr2,vr1,vr16)
err3; lvx vr0,r4,r11
vperm vr11,vr1,vr0,vr16
VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
......@@ -665,9 +673,9 @@ err3; stvx vr11,r3,r11
9: bf cr7*4+2,10f
err3; lvx vr1,r0,r4
vperm vr8,vr0,vr1,vr16
VPERM(vr8,vr0,vr1,vr16)
err3; lvx vr0,r4,r9
vperm vr9,vr1,vr0,vr16
VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
......@@ -675,7 +683,7 @@ err3; stvx vr9,r3,r9
10: bf cr7*4+3,11f
err3; lvx vr1,r0,r4
vperm vr8,vr0,vr1,vr16
VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
err3; stvx vr8,r0,r3
addi r3,r3,16
......
......@@ -20,6 +20,15 @@
#include <asm/ppc_asm.h>
_GLOBAL(memcpy_power7)
#ifdef __BIG_ENDIAN__
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
#else
#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
#endif
#ifdef CONFIG_ALTIVEC
cmpldi r5,16
cmpldi cr1,r5,4096
......@@ -485,13 +494,13 @@ _GLOBAL(memcpy_power7)
li r10,32
li r11,48
lvsl vr16,0,r4 /* Setup permute control vector */
LVS(vr16,0,r4) /* Setup permute control vector */
lvx vr0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
lvx vr1,r0,r4
vperm vr8,vr0,vr1,vr16
VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
stvx vr8,r0,r3
addi r3,r3,16
......@@ -499,9 +508,9 @@ _GLOBAL(memcpy_power7)
5: bf cr7*4+2,6f
lvx vr1,r0,r4
vperm vr8,vr0,vr1,vr16
VPERM(vr8,vr0,vr1,vr16)
lvx vr0,r4,r9
vperm vr9,vr1,vr0,vr16
VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
stvx vr8,r0,r3
stvx vr9,r3,r9
......@@ -509,13 +518,13 @@ _GLOBAL(memcpy_power7)
6: bf cr7*4+1,7f
lvx vr3,r0,r4
vperm vr8,vr0,vr3,vr16
VPERM(vr8,vr0,vr3,vr16)
lvx vr2,r4,r9
vperm vr9,vr3,vr2,vr16
VPERM(vr9,vr3,vr2,vr16)
lvx vr1,r4,r10
vperm vr10,vr2,vr1,vr16
VPERM(vr10,vr2,vr1,vr16)
lvx vr0,r4,r11
vperm vr11,vr1,vr0,vr16
VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
stvx vr8,r0,r3
stvx vr9,r3,r9
......@@ -544,21 +553,21 @@ _GLOBAL(memcpy_power7)
.align 5
8:
lvx vr7,r0,r4
vperm vr8,vr0,vr7,vr16
VPERM(vr8,vr0,vr7,vr16)
lvx vr6,r4,r9
vperm vr9,vr7,vr6,vr16
VPERM(vr9,vr7,vr6,vr16)
lvx vr5,r4,r10
vperm vr10,vr6,vr5,vr16
VPERM(vr10,vr6,vr5,vr16)
lvx vr4,r4,r11
vperm vr11,vr5,vr4,vr16
VPERM(vr11,vr5,vr4,vr16)
lvx vr3,r4,r12
vperm vr12,vr4,vr3,vr16
VPERM(vr12,vr4,vr3,vr16)
lvx vr2,r4,r14
vperm vr13,vr3,vr2,vr16
VPERM(vr13,vr3,vr2,vr16)
lvx vr1,r4,r15
vperm vr14,vr2,vr1,vr16
VPERM(vr14,vr2,vr1,vr16)
lvx vr0,r4,r16
vperm vr15,vr1,vr0,vr16
VPERM(vr15,vr1,vr0,vr16)
addi r4,r4,128
stvx vr8,r0,r3
stvx vr9,r3,r9
......@@ -582,13 +591,13 @@ _GLOBAL(memcpy_power7)
bf cr7*4+1,9f
lvx vr3,r0,r4
vperm vr8,vr0,vr3,vr16
VPERM(vr8,vr0,vr3,vr16)
lvx vr2,r4,r9
vperm vr9,vr3,vr2,vr16
VPERM(vr9,vr3,vr2,vr16)
lvx vr1,r4,r10
vperm vr10,vr2,vr1,vr16
VPERM(vr10,vr2,vr1,vr16)
lvx vr0,r4,r11
vperm vr11,vr1,vr0,vr16
VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
stvx vr8,r0,r3
stvx vr9,r3,r9
......@@ -598,9 +607,9 @@ _GLOBAL(memcpy_power7)
9: bf cr7*4+2,10f
lvx vr1,r0,r4
vperm vr8,vr0,vr1,vr16
VPERM(vr8,vr0,vr1,vr16)
lvx vr0,r4,r9
vperm vr9,vr1,vr0,vr16
VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
stvx vr8,r0,r3
stvx vr9,r3,r9
......@@ -608,7 +617,7 @@ _GLOBAL(memcpy_power7)
10: bf cr7*4+3,11f
lvx vr1,r0,r4
vperm vr8,vr0,vr1,vr16
VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
stvx vr8,r0,r3
addi r3,r3,16
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册