1. 21 5月, 2016 1 次提交
    • Z
      lib/GCD.c: use binary GCD algorithm instead of Euclidean · fff7fb0b
      Zhaoxiu Zeng 提交于
      The binary GCD algorithm is based on the following facts:
      	1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
      	2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
      	3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)
      
      Even on x86 machines with reasonable division hardware, the binary
      algorithm runs about 25% faster (80% the execution time) than the
      division-based Euclidian algorithm.
      
      On platforms like Alpha and ARMv6 where division is a function call to
      emulation code, it's even more significant.
      
      There are two variants of the code here, depending on whether a fast
      __ffs (find least significant set bit) instruction is available.  This
      allows the unpredictable branches in the bit-at-a-time shifting loop to
      be eliminated.
      
      If fast __ffs is not available, the "even/odd" GCD variant is used.
      
      I use the following code to benchmark:
      
      	#include <stdio.h>
      	#include <stdlib.h>
      	#include <stdint.h>
      	#include <string.h>
      	#include <time.h>
      	#include <unistd.h>
      
      	#define swap(a, b) \
      		do { \
      			a ^= b; \
      			b ^= a; \
      			a ^= b; \
      		} while (0)
      
      	unsigned long gcd0(unsigned long a, unsigned long b)
      	{
      		unsigned long r;
      
      		if (a < b) {
      			swap(a, b);
      		}
      
      		if (b == 0)
      			return a;
      
      		while ((r = a % b) != 0) {
      			a = b;
      			b = r;
      		}
      
      		return b;
      	}
      
      	unsigned long gcd1(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		b >>= __builtin_ctzl(b);
      
      		for (;;) {
      			a >>= __builtin_ctzl(a);
      			if (a == b)
      				return a << __builtin_ctzl(r);
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      		}
      	}
      
      	unsigned long gcd2(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		r &= -r;
      
      		while (!(b & r))
      			b >>= 1;
      
      		for (;;) {
      			while (!(a & r))
      				a >>= 1;
      			if (a == b)
      				return a;
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      			a >>= 1;
      			if (a & r)
      				a += b;
      			a >>= 1;
      		}
      	}
      
      	unsigned long gcd3(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		b >>= __builtin_ctzl(b);
      		if (b == 1)
      			return r & -r;
      
      		for (;;) {
      			a >>= __builtin_ctzl(a);
      			if (a == 1)
      				return r & -r;
      			if (a == b)
      				return a << __builtin_ctzl(r);
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      		}
      	}
      
      	unsigned long gcd4(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		r &= -r;
      
      		while (!(b & r))
      			b >>= 1;
      		if (b == r)
      			return r;
      
      		for (;;) {
      			while (!(a & r))
      				a >>= 1;
      			if (a == r)
      				return r;
      			if (a == b)
      				return a;
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      			a >>= 1;
      			if (a & r)
      				a += b;
      			a >>= 1;
      		}
      	}
      
      	static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
      		gcd0, gcd1, gcd2, gcd3, gcd4,
      	};
      
      	#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))
      
      	#if defined(__x86_64__)
      
      	#define rdtscll(val) do { \
      		unsigned long __a,__d; \
      		__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
      		(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
      	} while(0)
      
      	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
      								unsigned long a, unsigned long b, unsigned long *res)
      	{
      		unsigned long long start, end;
      		unsigned long long ret;
      		unsigned long gcd_res;
      
      		rdtscll(start);
      		gcd_res = gcd(a, b);
      		rdtscll(end);
      
      		if (end >= start)
      			ret = end - start;
      		else
      			ret = ~0ULL - start + 1 + end;
      
      		*res = gcd_res;
      		return ret;
      	}
      
      	#else
      
      	static inline struct timespec read_time(void)
      	{
      		struct timespec time;
      		clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
      		return time;
      	}
      
      	static inline unsigned long long diff_time(struct timespec start, struct timespec end)
      	{
      		struct timespec temp;
      
      		if ((end.tv_nsec - start.tv_nsec) < 0) {
      			temp.tv_sec = end.tv_sec - start.tv_sec - 1;
      			temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
      		} else {
      			temp.tv_sec = end.tv_sec - start.tv_sec;
      			temp.tv_nsec = end.tv_nsec - start.tv_nsec;
      		}
      
      		return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
      	}
      
      	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
      								unsigned long a, unsigned long b, unsigned long *res)
      	{
      		struct timespec start, end;
      		unsigned long gcd_res;
      
      		start = read_time();
      		gcd_res = gcd(a, b);
      		end = read_time();
      
      		*res = gcd_res;
      		return diff_time(start, end);
      	}
      
      	#endif
      
      	static inline unsigned long get_rand()
      	{
      		if (sizeof(long) == 8)
      			return (unsigned long)rand() << 32 | rand();
      		else
      			return rand();
      	}
      
      	int main(int argc, char **argv)
      	{
      		unsigned int seed = time(0);
      		int loops = 100;
      		int repeats = 1000;
      		unsigned long (*res)[TEST_ENTRIES];
      		unsigned long long elapsed[TEST_ENTRIES];
      		int i, j, k;
      
      		for (;;) {
      			int opt = getopt(argc, argv, "n:r:s:");
      			/* End condition always first */
      			if (opt == -1)
      				break;
      
      			switch (opt) {
      			case 'n':
      				loops = atoi(optarg);
      				break;
      			case 'r':
      				repeats = atoi(optarg);
      				break;
      			case 's':
      				seed = strtoul(optarg, NULL, 10);
      				break;
      			default:
      				/* You won't actually get here. */
      				break;
      			}
      		}
      
      		res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
      		memset(elapsed, 0, sizeof(elapsed));
      
      		srand(seed);
      		for (j = 0; j < loops; j++) {
      			unsigned long a = get_rand();
      			/* Do we have args? */
      			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
      			unsigned long long min_elapsed[TEST_ENTRIES];
      			for (k = 0; k < repeats; k++) {
      				for (i = 0; i < TEST_ENTRIES; i++) {
      					unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
      					if (k == 0 || min_elapsed[i] > tmp)
      						min_elapsed[i] = tmp;
      				}
      			}
      			for (i = 0; i < TEST_ENTRIES; i++)
      				elapsed[i] += min_elapsed[i];
      		}
      
      		for (i = 0; i < TEST_ENTRIES; i++)
      			printf("gcd%d: elapsed %llu\n", i, elapsed[i]);
      
      		k = 0;
      		srand(seed);
      		for (j = 0; j < loops; j++) {
      			unsigned long a = get_rand();
      			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
      			for (i = 1; i < TEST_ENTRIES; i++) {
      				if (res[j][i] != res[j][0])
      					break;
      			}
      			if (i < TEST_ENTRIES) {
      				if (k == 0) {
      					k = 1;
      					fprintf(stderr, "Error:\n");
      				}
      				fprintf(stderr, "gcd(%lu, %lu): ", a, b);
      				for (i = 0; i < TEST_ENTRIES; i++)
      					fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
      			}
      		}
      
      		if (k == 0)
      			fprintf(stderr, "PASS\n");
      
      		free(res);
      
      		return 0;
      	}
      
      Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:
      
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 10174
        gcd1: elapsed 2120
        gcd2: elapsed 2902
        gcd3: elapsed 2039
        gcd4: elapsed 2812
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9309
        gcd1: elapsed 2280
        gcd2: elapsed 2822
        gcd3: elapsed 2217
        gcd4: elapsed 2710
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9589
        gcd1: elapsed 2098
        gcd2: elapsed 2815
        gcd3: elapsed 2030
        gcd4: elapsed 2718
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9914
        gcd1: elapsed 2309
        gcd2: elapsed 2779
        gcd3: elapsed 2228
        gcd4: elapsed 2709
        PASS
      
      [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
      Signed-off-by: NZhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
      Signed-off-by: NGeorge Spelvin <linux@horizon.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      fff7fb0b
  2. 21 1月, 2016 1 次提交
    • C
      dma-mapping: always provide the dma_map_ops based implementation · e1c7e324
      Christoph Hellwig 提交于
      Move the generic implementation to <linux/dma-mapping.h> now that all
      architectures support it and remove the HAVE_DMA_ATTR Kconfig symbol now
      that everyone supports them.
      
      [valentinrothberg@gmail.com: remove leftovers in Kconfig]
      Signed-off-by: NChristoph Hellwig <hch@lst.de>
      Cc: "David S. Miller" <davem@davemloft.net>
      Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
      Cc: Chris Metcalf <cmetcalf@ezchip.com>
      Cc: David Howells <dhowells@redhat.com>
      Cc: Geert Uytterhoeven <geert@linux-m68k.org>
      Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
      Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
      Cc: Helge Deller <deller@gmx.de>
      Cc: James Hogan <james.hogan@imgtec.com>
      Cc: Jesper Nilsson <jesper.nilsson@axis.com>
      Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
      Cc: Ley Foon Tan <lftan@altera.com>
      Cc: Mark Salter <msalter@redhat.com>
      Cc: Mikael Starvik <starvik@axis.com>
      Cc: Steven Miao <realmz6@gmail.com>
      Cc: Vineet Gupta <vgupta@synopsys.com>
      Cc: Christian Borntraeger <borntraeger@de.ibm.com>
      Cc: Joerg Roedel <jroedel@suse.de>
      Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
      Signed-off-by: NValentin Rothberg <valentinrothberg@gmail.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      e1c7e324
  3. 20 1月, 2016 2 次提交
  4. 15 12月, 2015 1 次提交
  5. 12 11月, 2015 1 次提交
  6. 23 6月, 2015 2 次提交
  7. 17 9月, 2013 1 次提交
  8. 13 9月, 2013 1 次提交
  9. 23 6月, 2013 1 次提交
  10. 17 4月, 2013 1 次提交
  11. 08 4月, 2013 1 次提交
  12. 15 3月, 2013 1 次提交
    • R
      CONFIG_SYMBOL_PREFIX: cleanup. · b92021b0
      Rusty Russell 提交于
      We have CONFIG_SYMBOL_PREFIX, which three archs define to the string
      "_".  But Al Viro broke this in "consolidate cond_syscall and
      SYSCALL_ALIAS declarations" (in linux-next), and he's not the first to
      do so.
      
      Using CONFIG_SYMBOL_PREFIX is awkward, since we usually just want to
      prefix it so something.  So various places define helpers which are
      defined to nothing if CONFIG_SYMBOL_PREFIX isn't set:
      
      1) include/asm-generic/unistd.h defines __SYMBOL_PREFIX.
      2) include/asm-generic/vmlinux.lds.h defines VMLINUX_SYMBOL(sym)
      3) include/linux/export.h defines MODULE_SYMBOL_PREFIX.
      4) include/linux/kernel.h defines SYMBOL_PREFIX (which differs from #7)
      5) kernel/modsign_certificate.S defines ASM_SYMBOL(sym)
      6) scripts/modpost.c defines MODULE_SYMBOL_PREFIX
      7) scripts/Makefile.lib defines SYMBOL_PREFIX on the commandline if
         CONFIG_SYMBOL_PREFIX is set, so that we have a non-string version
         for pasting.
      
      (arch/h8300/include/asm/linkage.h defines SYMBOL_NAME(), too).
      
      Let's solve this properly:
      1) No more generic prefix, just CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX.
      2) Make linux/export.h usable from asm.
      3) Define VMLINUX_SYMBOL() and VMLINUX_SYMBOL_STR().
      4) Make everyone use them.
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      Reviewed-by: NJames Hogan <james.hogan@imgtec.com>
      Tested-by: James Hogan <james.hogan@imgtec.com> (metag)
      b92021b0
  13. 13 3月, 2013 1 次提交
  14. 28 2月, 2013 1 次提交
  15. 14 2月, 2013 1 次提交
    • A
      burying unused conditionals · d64008a8
      Al Viro 提交于
      __ARCH_WANT_SYS_RT_SIGACTION,
      __ARCH_WANT_SYS_RT_SIGSUSPEND,
      __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND,
      __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL - not used anymore
      CONFIG_GENERIC_{SIGALTSTACK,COMPAT_RT_SIG{ACTION,QUEUEINFO,PENDING,PROCMASK}} -
      can be assumed always set.
      d64008a8
  16. 04 2月, 2013 3 次提交
  17. 20 12月, 2012 1 次提交
  18. 19 12月, 2012 1 次提交
    • F
      h8300: select generic atomic64_t support · d95bfe46
      Fengguang Wu 提交于
      Rationales from Eric:
      
      So I just looked a little deeper and it appears architectures that do
      not support atomic64_t are broken.
      
      The generic atomic64 support came in 2009 to support the perf subsystem
      with the expectation that all architectures would implement atomic64
      support.
      
      Furthermore upon inspection of the kernel atomic64_t is used in a fair
      number of places beyond the performance counters:
      
      block/blk-cgroup.c
      drivers/acpi/apei/
      drivers/block/rbd.c
      drivers/crypto/nx/nx.h
      drivers/gpu/drm/radeon/radeon.h
      drivers/infiniband/hw/ipath/
      drivers/infiniband/hw/qib/
      drivers/staging/octeon/
      fs/xfs/
      include/linux/perf_event.h
      include/net/netfilter/nf_conntrack_acct.h
      kernel/events/
      kernel/trace/
      net/mac80211/key.h
      net/rds/
      
      The block control group, infiniband, xfs, crypto, 802.11, netfilter.
      Nothing quite so fundamental as fs/namespace.c but definitely in
      multiplatform-code that should work, and is already broken on those
      architecutres.
      
      Looking at the implementation of atomic64_add_return in lib/atomic64.c the
      code looks as efficient as these kinds of things get.
      
      Which leads me to the conclusion that we need atomic64 support on all
      architectures.
      Signed-off-by: NFengguang Wu <fengguang.wu@intel.com>
      Cc: "Eric W. Biederman" <ebiederm@xmission.com>
      Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      d95bfe46
  19. 15 10月, 2012 2 次提交
  20. 09 10月, 2012 1 次提交
  21. 28 9月, 2012 1 次提交
    • D
      Make most arch asm/module.h files use asm-generic/module.h · 786d35d4
      David Howells 提交于
      Use the mapping of Elf_[SPE]hdr, Elf_Addr, Elf_Sym, Elf_Dyn, Elf_Rel/Rela,
      ELF_R_TYPE() and ELF_R_SYM() to either the 32-bit version or the 64-bit version
      into asm-generic/module.h for all arches bar MIPS.
      
      Also, use the generic definition mod_arch_specific where possible.
      
      To this end, I've defined three new config bools:
      
       (*) HAVE_MOD_ARCH_SPECIFIC
      
           Arches define this if they don't want to use the empty generic
           mod_arch_specific struct.
      
       (*) MODULES_USE_ELF_RELA
      
           Arches define this if their modules can contain RELA records.  This causes
           the Elf_Rela mapping to be emitted and allows apply_relocate_add() to be
           defined by the arch rather than have the core emit an error message.
      
       (*) MODULES_USE_ELF_REL
      
           Arches define this if their modules can contain REL records.  This causes
           the Elf_Rel mapping to be emitted and allows apply_relocate() to be
           defined by the arch rather than have the core emit an error message.
      
      Note that it is possible to allow both REL and RELA records: m68k and mips are
      two arches that do this.
      
      With this, some arch asm/module.h files can be deleted entirely and replaced
      with a generic-y marker in the arch Kbuild file.
      
      Additionally, I have removed the bits from m32r and score that handle the
      unsupported type of relocation record as that's now handled centrally.
      Signed-off-by: NDavid Howells <dhowells@redhat.com>
      Acked-by: NSam Ravnborg <sam@ravnborg.org>
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      786d35d4
  22. 31 7月, 2012 1 次提交
  23. 05 5月, 2012 2 次提交
  24. 12 1月, 2012 1 次提交
  25. 30 10月, 2011 1 次提交
  26. 19 10月, 2011 1 次提交
  27. 13 10月, 2011 1 次提交
  28. 27 5月, 2011 1 次提交
  29. 30 3月, 2011 1 次提交
  30. 25 3月, 2011 1 次提交
  31. 24 3月, 2011 1 次提交
  32. 21 1月, 2011 1 次提交
    • T
      h8300: Use generic irq Kconfig · 51f3f159
      Thomas Gleixner 提交于
      Switch to the generic irq Kconfig. h8300 has all irq chips converted
      to the new functions, so select the GENERIC_HARDIRQS_NO_DEPRECATED
      switch as well. Fixup the resulting fallout in show_interrupts().
      Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
      Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
      Cc: Paul Mundt <lethal@linux-sh.org>
      51f3f159
  33. 20 9月, 2010 1 次提交
  34. 27 7月, 2010 1 次提交