[ARM] Improve csum_fold, cleanup csum_tcpudp_magic()

csum_fold doesn't need two assembly instructions to perform its task, it can simply add the high and low parts together by rotating by 16 bits, and the carry into the upper-16 bits will automatically happen. Also, since csum_tcpudp_magic() is just csum_tcpudp_nofold + csum_fold, use those two functions to achieve this. Also note that there is a csum_fold() at the end of ip_fast_csum() as well, so use the real csum_fold() there as well. Boot tested on Versatile. Signed-off-by: N Russell King <rmk+kernel@arm.linux.org.uk>

[ARM] Improve csum_fold, cleanup csum_tcpudp_magic()
csum_fold doesn't need two assembly instructions to perform its task, it can simply add the high and low parts together by rotating by 16 bits, and the carry into the upper-16 bits will automatically happen. Also, since csum_tcpudp_magic() is just csum_tcpudp_nofold + csum_fold, use those two functions to achieve this. Also note that there is a csum_fold() at the end of ip_fast_csum() as well, so use the real csum_fold() there as well. Boot tested on Versatile. Signed-off-by: N Russell King <rmk+kernel@arm.linux.org.uk>
7ef416c4 · Russell King · Russell King · 10c03f69 · 7ef416c4
隐藏空白更改
内联并排

Showing with 18 addition and 38 deletion

include/asm-arm/checksum.h include/asm-arm/checksum.h +18 -38

未找到文件。
--- a/include/asm-arm/checksum.h
+++ b/include/asm-arm/checksum.h
@@ -39,6 +39,19 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
 __wsum
 csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);

+/*
+ * 	Fold a partial checksum without adding pseudo headers
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	__asm__(
+	"add	%0, %1, %1, ror #16	@ csum_fold"
+	: "=r" (sum)
+	: "r" (sum)
+	: "cc");
+	return (__force __sum16)(~(__force u32)sum >> 16);
+}
+
 /*
 *	This is a version of ip_compute_csum() optimized for IP headers,
 *	which always checksum on 4 octet boundaries.
@@ -46,7 +59,8 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum s
 static inline __sum16
 ip_fast_csum(const void *iph, unsigned int ihl)
 {
-	unsigned int sum, tmp1;
+	unsigned int tmp1;
+	__wsum sum;

 	__asm__ __volatile__(
 	"ldr	%0, [%1], #4		@ ip_fast_csum		\n\
@@ -62,29 +76,11 @@ ip_fast_csum(const void *iph, unsigned int ihl)
 	subne	%2, %2, #1		@ without destroying	\n\
 	bne	1b			@ the carry flag	\n\
 	adcs	%0, %0, %3					\n\
-	adc	%0, %0, #0					\n\
-	adds	%0, %0, %0, lsl #16				\n\
-	addcs	%0, %0, #0x10000				\n\
-	mvn	%0, %0						\n\
-	mov	%0, %0, lsr #16"
+	adc	%0, %0, #0"
 	: "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (tmp1)
 	: "1" (iph), "2" (ihl)
 	: "cc", "memory");
-	return (__force __sum16)sum;
-}
-
-/*
- * 	Fold a partial checksum without adding pseudo headers
- */
-static inline __sum16 csum_fold(__wsum sum)
-{
-	__asm__(
-	"adds	%0, %1, %1, lsl #16	@ csum_fold		\n\
-	addcs	%0, %0, #0x10000"
-	: "=r" (sum)
-	: "r" (sum)
-	: "cc");
-	return (__force __sum16)(~(__force u32)sum >> 16);
+	return csum_fold(sum);
 }

 static inline __wsum
@@ -114,23 +110,7 @@ static inline __sum16
 csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
 		  unsigned short proto, __wsum sum)
 {
-	__asm__(
-	"adds	%0, %1, %2		@ csum_tcpudp_magic	\n\
-	adcs	%0, %0, %3					\n"
-#ifdef __ARMEB__
-	"adcs	%0, %0, %4					\n"
-#else
-	"adcs	%0, %0, %4, lsl #8				\n"
-#endif
-	"adcs	%0, %0, %5					\n\
-	adc	%0, %0, #0					\n\
-	adds	%0, %0, %0, lsl #16				\n\
-	addcs	%0, %0, #0x10000				\n\
-	mvn	%0, %0"
-	: "=&r"(sum)
-	: "r" (sum), "r" (daddr), "r" (saddr), "r" (len), "Ir" (htons(proto))
-	: "cc");
-	return (__force __sum16)((__force u32)sum >> 16);
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 }