diff --git a/arch/Kconfig b/arch/Kconfig index 8f84fd268dee6a2c4bf3792352c48ff11e2e6222..b16e74e4b5afbbcfad4091c4067c638d2e50da3c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -647,4 +647,7 @@ config COMPAT_OLD_SIGACTION config ARCH_NO_COHERENT_DMA_MMAP bool +config CPU_NO_EFFICIENT_FFS + def_bool n + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index fe99f894e57d647f36c18177311eb1355b6332ff..7f312d80b43b4327611295a8506f1894754b4544 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -26,6 +26,7 @@ config ALPHA select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION select OLD_SIGSUSPEND + select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67 help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8894f7e7e3de2a6e6cc1947d93a9359be961c699..0dcbacfdea4b98ca7805624a5ffb59b9298c4992 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -107,6 +107,7 @@ choice config ISA_ARCOMPACT bool "ARCompact ISA" + select CPU_NO_EFFICIENT_FFS help The original ARC ISA of ARC600/700 cores diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 55347662e5edafa0900b54987b219a5fb83a6b5a..cb569b65a54ddcdaa8cce08d047799a24ba75f4b 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -421,18 +421,21 @@ config CPU_32v3 select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v4 bool select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v4T bool select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v5 bool diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 986ea84caaed48a9fd4d16a7b1195a7b97b87eb5..aa232de2d4bc854a7c25b73a538dc7bcf7df755e 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -20,6 +20,7 @@ config H8300 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_ARCH_KGDB + select CPU_NO_EFFICIENT_FFS config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index c82b2925399116e87128e0c187fde3d9e7aadb75..3cc8498fe0fe8c983031c73a72c3b2c168f92218 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -17,6 +17,7 @@ config M32R select ARCH_USES_GETTIMEOFFSET select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW + select CPU_NO_EFFICIENT_FFS config SBUS bool diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index c1beb5ae181fa61648fab7f44040c940c8372aa1..8ace920ca24aaf1d491db0f7038a60e2bbc58237 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -40,6 +40,7 @@ config M68000 select CPU_HAS_NO_MULDIV64 select CPU_HAS_NO_UNALIGNED select GENERIC_CSUM + select CPU_NO_EFFICIENT_FFS help The Freescale (was Motorola) 68000 CPU is the first generation of the well known M68K family of processors. The CPU core as well as @@ -51,6 +52,7 @@ config MCPU32 bool select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_UNALIGNED + select CPU_NO_EFFICIENT_FFS help The Freescale (was then Motorola) CPU32 is a CPU core that is based on the 68020 processor. For the most part it is used in @@ -130,6 +132,7 @@ config M5206 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5206 processor support. @@ -138,6 +141,7 @@ config M5206e depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5206e processor support. @@ -163,6 +167,7 @@ config M5249 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5249 processor support. @@ -171,6 +176,7 @@ config M525x depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale (Motorola) Coldfire 5251/5253 processor support. @@ -189,6 +195,7 @@ config M5272 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5272 processor support. @@ -217,6 +224,7 @@ config M5307 select COLDFIRE_SW_A7 select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5307 processor support. @@ -242,6 +250,7 @@ config M5407 select COLDFIRE_SW_A7 select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5407 processor support. @@ -251,6 +260,7 @@ config M547x select MMU_COLDFIRE if MMU select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale ColdFire 5470/5471/5472/5473/5474/5475 processor support. @@ -260,6 +270,7 @@ config M548x select M54xx select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale ColdFire 5480/5481/5482/5483/5484/5485 processor support. diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index e47a08d72819651446cf937ce54333792666cf58..5b7a45d99cfbfd5397554b399a6ec2bd8e4b5582 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -30,6 +30,7 @@ config METAG select OF select OF_EARLY_FLATTREE select SPARSE_IRQ + select CPU_NO_EFFICIENT_FFS config STACKTRACE_SUPPORT def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 3d793b55f60c04b3b18cdf01b20a1d7c6964e65a..f17c3a4fb697d9b40752c235a43fe2fd65349db9 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -32,6 +32,7 @@ config MICROBLAZE select OF_EARLY_FLATTREE select TRACING_SUPPORT select VIRT_TO_BUS + select CPU_NO_EFFICIENT_FFS config SWAP def_bool n diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index e6f19fc61bf254d6fa05fdbec1164f76dca11c39..e961c8a7ea6626a1b044b366a99c5201e6d5beb3 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -204,6 +204,16 @@ #endif #endif +/* __builtin_constant_p(cpu_has_mips_r) && cpu_has_mips_r */ +#if !((defined(cpu_has_mips32r1) && cpu_has_mips32r1) || \ + (defined(cpu_has_mips32r2) && cpu_has_mips32r2) || \ + (defined(cpu_has_mips32r6) && cpu_has_mips32r6) || \ + (defined(cpu_has_mips64r1) && cpu_has_mips64r1) || \ + (defined(cpu_has_mips64r2) && cpu_has_mips64r2) || \ + (defined(cpu_has_mips64r6) && cpu_has_mips64r6)) +#define CPU_NO_EFFICIENT_FFS 1 +#endif + #ifndef cpu_has_mips_1 # define cpu_has_mips_1 (!cpu_has_mips_r6) #endif diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 87ca653eb5f3452558ea89c53d641f78dd370a62..51a56c8b04b451ce140a5708b4d5ca57bc7a6404 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -15,6 +15,7 @@ config NIOS2 select SOC_BUS select SPARSE_IRQ select USB_ARCH_HAS_HCD if USB_SUPPORT + select CPU_NO_EFFICIENT_FFS config GENERIC_CSUM def_bool y diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e118c02cc79a085eb59e10513bbf15c0272725a3..142cb057c41bd3218cb4a3634335452e161f172c 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -25,6 +25,7 @@ config OPENRISC select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW select OR1K_PIC + select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 config MMU def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 88cfaa8af78ea2af3192472920100a4ac1de8207..3d498a676551d0fa778f0bafe0cbced3fe44765d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -32,6 +32,7 @@ config PARISC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select ARCH_NO_COHERENT_DMA_MMAP + select CPU_NO_EFFICIENT_FFS help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1c3c43d9d1b54f5d828c45f272e7dc0be882024f..a8c259059adf0e7b6af3587d59d2d370e0b0cd5d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -123,6 +123,7 @@ config S390 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_EARLY_PFN_TO_NID select HAVE_ARCH_JUMP_LABEL + select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 366e1b599a7b7849b74f17f19308def9028cc971..507d6318138941aa5dec6cc767e992ca598e009f 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -14,6 +14,7 @@ config SCORE select VIRT_TO_BUS select MODULES_USE_ELF_REL select CLONE_BACKWARDS + select CPU_NO_EFFICIENT_FFS choice prompt "System type" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index f6254341c065da7ac4606b02a342c13b70b8e243..e803a836cb7ca81e9e1546649cc15e2e350dd88c 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -20,6 +20,7 @@ config SUPERH select PERF_USE_VMALLOC select HAVE_DEBUG_KMEMLEAK select HAVE_KERNEL_GZIP + select CPU_NO_EFFICIENT_FFS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_KERNEL_XZ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 1012f7ffcdf5f5486ffe67ec185c356509d6f6b6..546293d9e6c52906f2c1f72d1a9545749595f994 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -42,6 +42,7 @@ config SPARC select ODD_RT_SIGACTION select OLD_SIGSUSPEND select ARCH_HAS_SG_CHAIN + select CPU_NO_EFFICIENT_FFS config SPARC32 def_bool !64BIT diff --git a/lib/gcd.c b/lib/gcd.c index 3657f129d7b8c5db0bee11a94a97d5d4bf251b52..135ee6407a5e4f0a6f7fc8da85f38a74ea37ed38 100644 --- a/lib/gcd.c +++ b/lib/gcd.c @@ -2,20 +2,77 @@ #include #include -/* Greatest common divisor */ +/* + * This implements the binary GCD algorithm. (Often attributed to Stein, + * but as Knuth has noted, appears in a first-century Chinese math text.) + * + * This is faster than the division-based algorithm even on x86, which + * has decent hardware division. + */ + +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) && !defined(CPU_NO_EFFICIENT_FFS) + +/* If __ffs is available, the even/odd algorithm benchmarks slower. */ unsigned long gcd(unsigned long a, unsigned long b) { - unsigned long r; + unsigned long r = a | b; + + if (!a || !b) + return r; - if (a < b) - swap(a, b); + b >>= __ffs(b); + if (b == 1) + return r & -r; - if (!b) - return a; - while ((r = a % b) != 0) { - a = b; - b = r; + for (;;) { + a >>= __ffs(a); + if (a == 1) + return r & -r; + if (a == b) + return a << __ffs(r); + + if (a < b) + swap(a, b); + a -= b; } - return b; } + +#else + +/* If normalization is done by loops, the even/odd algorithm is a win. */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r = a | b; + + if (!a || !b) + return r; + + /* Isolate lsbit of r */ + r &= -r; + + while (!(b & r)) + b >>= 1; + if (b == r) + return r; + + for (;;) { + while (!(a & r)) + a >>= 1; + if (a == r) + return r; + if (a == b) + return a; + + if (a < b) + swap(a, b); + a -= b; + a >>= 1; + if (a & r) + a += b; + a >>= 1; + } +} + +#endif + EXPORT_SYMBOL_GPL(gcd);