[S390] convert/optimize csum_fold() to C

In the meantime gcc generates better code than the old inline assemblies do. Original inline assembly results in: lr %r1,%r2 sr %r3,%r3 lr %r2,%r1 srdl %r2,16 alr %r2,%r3 alr %r1,%r2 srl %r1,16 xilf %r1,65535 llghr %r2,%r1 br %r14 Out of the C code gcc generates this: rll %r1,%r2,16 ar %r1,%r2 srl %r1,16 xilf %r1,65535 llghr %r2,%r1 br %r14 In addition we don't have any static register allocations anymore and gcc is free to shuffle instructions around for better pipeline usage. Signed-off-by: N Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: N Martin Schwidefsky <schwidefsky@de.ibm.com>

[S390] convert/optimize csum_fold() to C
In the meantime gcc generates better code than the old inline assemblies do. Original inline assembly results in: lr %r1,%r2 sr %r3,%r3 lr %r2,%r1 srdl %r2,16 alr %r2,%r3 alr %r1,%r2 srl %r1,16 xilf %r1,65535 llghr %r2,%r1 br %r14 Out of the C code gcc generates this: rll %r1,%r2,16 ar %r1,%r2 srl %r1,16 xilf %r1,65535 llghr %r2,%r1 br %r14 In addition we don't have any static register allocations anymore and gcc is free to shuffle instructions around for better pipeline usage. Signed-off-by: N Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: N Martin Schwidefsky <schwidefsky@de.ibm.com>
04efc3be · Heiko Carstens · Martin Schwidefsky · 05e7ff7d · 04efc3be
隐藏空白更改
内联并排

Showing with 4 addition and 21 deletion

arch/s390/include/asm/checksum.h arch/s390/include/asm/checksum.h +4 -21

未找到文件。
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
 */
 static inline __sum16 csum_fold(__wsum sum)
 {
-#ifndef __s390x__
-	register_pair rp;
+	u32 csum = (__force u32) sum;

-	asm volatile(
-		"	slr	%N1,%N1\n"	/* %0 = H L */
-		"	lr	%1,%0\n"	/* %0 = H L, %1 = H L 0 0 */
-		"	srdl	%1,16\n"	/* %0 = H L, %1 = 0 H L 0 */
-		"	alr	%1,%N1\n"	/* %0 = H L, %1 = L H L 0 */
-		"	alr	%0,%1\n"	/* %0 = H+L+C L+H */
-		"	srl	%0,16\n"	/* %0 = H+L+C */
-		: "+&d" (sum), "=d" (rp) : : "cc");
-#else /* __s390x__ */
-	asm volatile(
-		"	sr	3,3\n"		/* %0 = H*65536 + L */
-		"	lr	2,%0\n"		/* %0 = H L, 2/3 = H L / 0 0 */
-		"	srdl	2,16\n"		/* %0 = H L, 2/3 = 0 H / L 0 */
-		"	alr	2,3\n"		/* %0 = H L, 2/3 = L H / L 0 */
-		"	alr	%0,2\n"		/* %0 = H+L+C L+H */
-		"	srl	%0,16\n"	/* %0 = H+L+C */
-		: "+&d" (sum) : : "cc", "2", "3");
-#endif /* __s390x__ */
-	return (__force __sum16) ~sum;
+	csum += (csum >> 16) + (csum << 16);
+	csum >>= 16;
+	return (__force __sum16) ~csum;
 }

 /*