x86: change x86 to use generic find_next_bit

The versions with inline assembly are in fact slower on the machines I tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD Opteron 270). The i386-version needed a fix similar to 06024f21 to avoid crashing the benchmark. Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size 1...512, for each possible bitmap with one bit set, for each possible offset: find the position of the first bit starting at offset. If you follow ;). Times include setup of the bitmap and checking of the results. Athlon Xeon Opteron 32/64bit x86-specific: 0m3.692s 0m2.820s 0m3.196s / 0m2.480s generic: 0m2.622s 0m1.662s 0m2.100s / 0m1.572s If the bitmap size is not a multiple of BITS_PER_LONG, and no set (cleared) bit is found, find_next_bit (find_next_zero_bit) returns a value outside of the range [0, size]. The generic version always returns exactly size. The generic version also uses unsigned long everywhere, while the x86 versions use a mishmash of int, unsigned (int), long and unsigned long. Using the generic version does give a slightly bigger kernel, though. defconfig: text data bss dec hex filename x86-specific: 4738555 481232 626688 5846475 5935cb vmlinux (32 bit) generic: 4738621 481232 626688 5846541 59360d vmlinux (32 bit) x86-specific: 5392395 846568 724424 6963387 6a40bb vmlinux (64 bit) generic: 5392458 846568 724424 6963450 6a40fa vmlinux (64 bit) Signed-off-by: N Alexander van Heukelum <heukelum@fastmail.fm> Signed-off-by: N Ingo Molnar <mingo@elte.hu>

x86: change x86 to use generic find_next_bit
The versions with inline assembly are in fact slower on the machines I tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD Opteron 270). The i386-version needed a fix similar to 06024f21 to avoid crashing the benchmark. Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size 1...512, for each possible bitmap with one bit set, for each possible offset: find the position of the first bit starting at offset. If you follow ;). Times include setup of the bitmap and checking of the results. Athlon Xeon Opteron 32/64bit x86-specific: 0m3.692s 0m2.820s 0m3.196s / 0m2.480s generic: 0m2.622s 0m1.662s 0m2.100s / 0m1.572s If the bitmap size is not a multiple of BITS_PER_LONG, and no set (cleared) bit is found, find_next_bit (find_next_zero_bit) returns a value outside of the range [0, size]. The generic version always returns exactly size. The generic version also uses unsigned long everywhere, while the x86 versions use a mishmash of int, unsigned (int), long and unsigned long. Using the generic version does give a slightly bigger kernel, though. defconfig: text data bss dec hex filename x86-specific: 4738555 481232 626688 5846475 5935cb vmlinux (32 bit) generic: 4738621 481232 626688 5846541 59360d vmlinux (32 bit) x86-specific: 5392395 846568 724424 6963387 6a40bb vmlinux (64 bit) generic: 5392458 846568 724424 6963450 6a40fa vmlinux (64 bit) Signed-off-by: N Alexander van Heukelum <heukelum@fastmail.fm> Signed-off-by: N Ingo Molnar <mingo@elte.hu>
6fd92b63 · Alexander van Heukelum · Ingo Molnar · 18e413f7 · 6fd92b63 · 6fd92b63
8 changed file
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -77,6 +77,9 @@ config GENERIC_BUG
 	def_bool y
 	depends on BUG

+config GENERIC_FIND_NEXT_BIT
+	def_bool y
+
 config GENERIC_HWEIGHT
 	def_bool y


--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o
 ifeq ($(CONFIG_X86_32),y)
        lib-y += checksum_32.o
        lib-y += strstr_32.o
-        lib-y += bitops_32.o semaphore_32.o string_32.o
+        lib-y += semaphore_32.o string_32.o

        lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
 else

--- a/arch/x86/lib/bitops_32.c
+++ b/arch/x86/lib/bitops_32.c
-#include <linux/bitops.h>
-#include <linux/module.h>
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_bit(const unsigned long *addr, int size, int offset)
-{
-	const unsigned long *p = addr + (offset >> 5);
-	int set = 0, bit = offset & 31, res;
-
-	if (bit) {
-		/*
-		 * Look for nonzero in the first 32 bits:
-		 */
-		__asm__("bsfl %1,%0\n\t"
-			"jne 1f\n\t"
-			"movl $32, %0\n"
-			"1:"
-			: "=r" (set)
-			: "r" (*p >> bit));
-		if (set < (32 - bit))
-			return set + offset;
-		set = 32 - bit;
-		p++;
-	}
-	/*
-	 * No set bit yet, search remaining full words for a bit
-	 */
-	res = find_first_bit (p, size - 32 * (p - addr));
-	return (offset + set + res);
-}
-EXPORT_SYMBOL(find_next_bit);
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_zero_bit(const unsigned long *addr, int size, int offset)
-{
-	const unsigned long *p = addr + (offset >> 5);
-	int set = 0, bit = offset & 31, res;
-
-	if (bit) {
-		/*
-		 * Look for zero in the first 32 bits.
-		 */
-		__asm__("bsfl %1,%0\n\t"
-			"jne 1f\n\t"
-			"movl $32, %0\n"
-			"1:"
-			: "=r" (set)
-			: "r" (~(*p >> bit)));
-		if (set < (32 - bit))
-			return set + offset;
-		set = 32 - bit;
-		p++;
-	}
-	/*
-	 * No zero yet, search remaining full bytes for a zero
-	 */
-	res = find_first_zero_bit(p, size - 32 * (p - addr));
-	return (offset + set + res);
-}
-EXPORT_SYMBOL(find_next_zero_bit);
--- a/arch/x86/lib/bitops_64.c
+++ b/arch/x86/lib/bitops_64.c
 #include <linux/bitops.h>

 #undef find_first_zero_bit
-#undef find_next_zero_bit
 #undef find_first_bit
-#undef find_next_bit

 static inline long
 __find_first_zero_bit(const unsigned long * addr, unsigned long size)
@@ -57,39 +55,6 @@ long find_first_zero_bit(const unsigned long * addr, unsigned long size)
 	return __find_first_zero_bit (addr, size);
 }

-/**
- * find_next_zero_bit - find the next zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_zero_bit (const unsigned long * addr, long size, long offset)
-{
-	const unsigned long * p = addr + (offset >> 6);
-	unsigned long set = 0;
-	unsigned long res, bit = offset&63;
-
-	if (bit) {
-		/*
-		 * Look for zero in first word
-		 */
-		asm("bsfq %1,%0\n\t"
-		    "cmoveq %2,%0"
-		    : "=r" (set)
-		    : "r" (~(*p >> bit)), "r"(64L));
-		if (set < (64 - bit))
-			return set + offset;
-		set = 64 - bit;
-		p++;
-	}
-	/*
-	 * No zero yet, search remaining full words for a zero
-	 */
-	res = __find_first_zero_bit (p, size - 64 * (p - addr));
-
-	return (offset + set + res);
-}
-
 static inline long
 __find_first_bit(const unsigned long * addr, unsigned long size)
 {
@@ -136,40 +101,7 @@ long find_first_bit(const unsigned long * addr, unsigned long size)
 	return __find_first_bit(addr,size);
 }

-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_bit(const unsigned long * addr, long size, long offset)
-{
-	const unsigned long * p = addr + (offset >> 6);
-	unsigned long set = 0, bit = offset & 63, res;
-
-	if (bit) {
-		/*
-		 * Look for nonzero in the first 64 bits:
-		 */
-		asm("bsfq %1,%0\n\t"
-		    "cmoveq %2,%0\n\t"
-		    : "=r" (set)
-		    : "r" (*p >> bit), "r" (64L));
-		if (set < (64 - bit))
-			return set + offset;
-		set = 64 - bit;
-		p++;
-	}
-	/*
-	 * No set bit yet, search remaining full words for a bit
-	 */
-	res = __find_first_bit (p, size - 64 * (p - addr));
-	return (offset + set + res);
-}
-
 #include <linux/module.h>

-EXPORT_SYMBOL(find_next_bit);
 EXPORT_SYMBOL(find_first_bit);
 EXPORT_SYMBOL(find_first_zero_bit);
-EXPORT_SYMBOL(find_next_zero_bit);
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -306,6 +306,12 @@ static int test_bit(int nr, const volatile unsigned long *addr);
 #undef BIT_ADDR
 #undef ADDR

+unsigned long find_next_bit(const unsigned long *addr,
+		unsigned long size, unsigned long offset);
+unsigned long find_next_zero_bit(const unsigned long *addr,
+		unsigned long size, unsigned long offset);
+
+
 #ifdef CONFIG_X86_32
 # include "bitops_32.h"
 #else

--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -39,14 +39,6 @@ static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
 	return res;
 }

-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bit number to start searching at
- * @size: The maximum size to search
- */
-int find_next_zero_bit(const unsigned long *addr, int size, int offset);
-
 /**
 * __ffs - find first bit in word.
 * @word: The word to search
@@ -82,14 +74,6 @@ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
 	return x;
 }

-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bit number to start searching at
- * @size: The maximum size to search
- */
-int find_next_bit(const unsigned long *addr, int size, int offset);
-
 /**
 * ffz - find first zero in word.
 * @word: The word to search

--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -6,9 +6,7 @@
 */

 extern long find_first_zero_bit(const unsigned long *addr, unsigned long size);
-extern long find_next_zero_bit(const unsigned long *addr, long size, long offset);
 extern long find_first_bit(const unsigned long *addr, unsigned long size);
-extern long find_next_bit(const unsigned long *addr, long size, long offset);

 /* return index of first bet set in val or max when no bit is set */
 static inline long __scanbit(unsigned long val, unsigned long max)

--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -15,6 +15,8 @@
 #include <asm/byteorder.h>

 #define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
+#undef find_next_bit
+#undef find_next_zero_bit

 /**
 * find_next_bit - find the next set bit in a memory region