1. 21 5月, 2016 1 次提交
    • Z
      lib/GCD.c: use binary GCD algorithm instead of Euclidean · fff7fb0b
      Zhaoxiu Zeng 提交于
      The binary GCD algorithm is based on the following facts:
      	1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
      	2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
      	3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)
      
      Even on x86 machines with reasonable division hardware, the binary
      algorithm runs about 25% faster (80% the execution time) than the
      division-based Euclidian algorithm.
      
      On platforms like Alpha and ARMv6 where division is a function call to
      emulation code, it's even more significant.
      
      There are two variants of the code here, depending on whether a fast
      __ffs (find least significant set bit) instruction is available.  This
      allows the unpredictable branches in the bit-at-a-time shifting loop to
      be eliminated.
      
      If fast __ffs is not available, the "even/odd" GCD variant is used.
      
      I use the following code to benchmark:
      
      	#include <stdio.h>
      	#include <stdlib.h>
      	#include <stdint.h>
      	#include <string.h>
      	#include <time.h>
      	#include <unistd.h>
      
      	#define swap(a, b) \
      		do { \
      			a ^= b; \
      			b ^= a; \
      			a ^= b; \
      		} while (0)
      
      	unsigned long gcd0(unsigned long a, unsigned long b)
      	{
      		unsigned long r;
      
      		if (a < b) {
      			swap(a, b);
      		}
      
      		if (b == 0)
      			return a;
      
      		while ((r = a % b) != 0) {
      			a = b;
      			b = r;
      		}
      
      		return b;
      	}
      
      	unsigned long gcd1(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		b >>= __builtin_ctzl(b);
      
      		for (;;) {
      			a >>= __builtin_ctzl(a);
      			if (a == b)
      				return a << __builtin_ctzl(r);
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      		}
      	}
      
      	unsigned long gcd2(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		r &= -r;
      
      		while (!(b & r))
      			b >>= 1;
      
      		for (;;) {
      			while (!(a & r))
      				a >>= 1;
      			if (a == b)
      				return a;
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      			a >>= 1;
      			if (a & r)
      				a += b;
      			a >>= 1;
      		}
      	}
      
      	unsigned long gcd3(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		b >>= __builtin_ctzl(b);
      		if (b == 1)
      			return r & -r;
      
      		for (;;) {
      			a >>= __builtin_ctzl(a);
      			if (a == 1)
      				return r & -r;
      			if (a == b)
      				return a << __builtin_ctzl(r);
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      		}
      	}
      
      	unsigned long gcd4(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		r &= -r;
      
      		while (!(b & r))
      			b >>= 1;
      		if (b == r)
      			return r;
      
      		for (;;) {
      			while (!(a & r))
      				a >>= 1;
      			if (a == r)
      				return r;
      			if (a == b)
      				return a;
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      			a >>= 1;
      			if (a & r)
      				a += b;
      			a >>= 1;
      		}
      	}
      
      	static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
      		gcd0, gcd1, gcd2, gcd3, gcd4,
      	};
      
      	#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))
      
      	#if defined(__x86_64__)
      
      	#define rdtscll(val) do { \
      		unsigned long __a,__d; \
      		__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
      		(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
      	} while(0)
      
      	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
      								unsigned long a, unsigned long b, unsigned long *res)
      	{
      		unsigned long long start, end;
      		unsigned long long ret;
      		unsigned long gcd_res;
      
      		rdtscll(start);
      		gcd_res = gcd(a, b);
      		rdtscll(end);
      
      		if (end >= start)
      			ret = end - start;
      		else
      			ret = ~0ULL - start + 1 + end;
      
      		*res = gcd_res;
      		return ret;
      	}
      
      	#else
      
      	static inline struct timespec read_time(void)
      	{
      		struct timespec time;
      		clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
      		return time;
      	}
      
      	static inline unsigned long long diff_time(struct timespec start, struct timespec end)
      	{
      		struct timespec temp;
      
      		if ((end.tv_nsec - start.tv_nsec) < 0) {
      			temp.tv_sec = end.tv_sec - start.tv_sec - 1;
      			temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
      		} else {
      			temp.tv_sec = end.tv_sec - start.tv_sec;
      			temp.tv_nsec = end.tv_nsec - start.tv_nsec;
      		}
      
      		return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
      	}
      
      	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
      								unsigned long a, unsigned long b, unsigned long *res)
      	{
      		struct timespec start, end;
      		unsigned long gcd_res;
      
      		start = read_time();
      		gcd_res = gcd(a, b);
      		end = read_time();
      
      		*res = gcd_res;
      		return diff_time(start, end);
      	}
      
      	#endif
      
      	static inline unsigned long get_rand()
      	{
      		if (sizeof(long) == 8)
      			return (unsigned long)rand() << 32 | rand();
      		else
      			return rand();
      	}
      
      	int main(int argc, char **argv)
      	{
      		unsigned int seed = time(0);
      		int loops = 100;
      		int repeats = 1000;
      		unsigned long (*res)[TEST_ENTRIES];
      		unsigned long long elapsed[TEST_ENTRIES];
      		int i, j, k;
      
      		for (;;) {
      			int opt = getopt(argc, argv, "n:r:s:");
      			/* End condition always first */
      			if (opt == -1)
      				break;
      
      			switch (opt) {
      			case 'n':
      				loops = atoi(optarg);
      				break;
      			case 'r':
      				repeats = atoi(optarg);
      				break;
      			case 's':
      				seed = strtoul(optarg, NULL, 10);
      				break;
      			default:
      				/* You won't actually get here. */
      				break;
      			}
      		}
      
      		res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
      		memset(elapsed, 0, sizeof(elapsed));
      
      		srand(seed);
      		for (j = 0; j < loops; j++) {
      			unsigned long a = get_rand();
      			/* Do we have args? */
      			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
      			unsigned long long min_elapsed[TEST_ENTRIES];
      			for (k = 0; k < repeats; k++) {
      				for (i = 0; i < TEST_ENTRIES; i++) {
      					unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
      					if (k == 0 || min_elapsed[i] > tmp)
      						min_elapsed[i] = tmp;
      				}
      			}
      			for (i = 0; i < TEST_ENTRIES; i++)
      				elapsed[i] += min_elapsed[i];
      		}
      
      		for (i = 0; i < TEST_ENTRIES; i++)
      			printf("gcd%d: elapsed %llu\n", i, elapsed[i]);
      
      		k = 0;
      		srand(seed);
      		for (j = 0; j < loops; j++) {
      			unsigned long a = get_rand();
      			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
      			for (i = 1; i < TEST_ENTRIES; i++) {
      				if (res[j][i] != res[j][0])
      					break;
      			}
      			if (i < TEST_ENTRIES) {
      				if (k == 0) {
      					k = 1;
      					fprintf(stderr, "Error:\n");
      				}
      				fprintf(stderr, "gcd(%lu, %lu): ", a, b);
      				for (i = 0; i < TEST_ENTRIES; i++)
      					fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
      			}
      		}
      
      		if (k == 0)
      			fprintf(stderr, "PASS\n");
      
      		free(res);
      
      		return 0;
      	}
      
      Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:
      
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 10174
        gcd1: elapsed 2120
        gcd2: elapsed 2902
        gcd3: elapsed 2039
        gcd4: elapsed 2812
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9309
        gcd1: elapsed 2280
        gcd2: elapsed 2822
        gcd3: elapsed 2217
        gcd4: elapsed 2710
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9589
        gcd1: elapsed 2098
        gcd2: elapsed 2815
        gcd3: elapsed 2030
        gcd4: elapsed 2718
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9914
        gcd1: elapsed 2309
        gcd2: elapsed 2779
        gcd3: elapsed 2228
        gcd4: elapsed 2709
        PASS
      
      [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
      Signed-off-by: NZhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
      Signed-off-by: NGeorge Spelvin <linux@horizon.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      fff7fb0b
  2. 26 4月, 2016 1 次提交
  3. 21 1月, 2016 1 次提交
    • C
      dma-mapping: always provide the dma_map_ops based implementation · e1c7e324
      Christoph Hellwig 提交于
      Move the generic implementation to <linux/dma-mapping.h> now that all
      architectures support it and remove the HAVE_DMA_ATTR Kconfig symbol now
      that everyone supports them.
      
      [valentinrothberg@gmail.com: remove leftovers in Kconfig]
      Signed-off-by: NChristoph Hellwig <hch@lst.de>
      Cc: "David S. Miller" <davem@davemloft.net>
      Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
      Cc: Chris Metcalf <cmetcalf@ezchip.com>
      Cc: David Howells <dhowells@redhat.com>
      Cc: Geert Uytterhoeven <geert@linux-m68k.org>
      Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
      Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
      Cc: Helge Deller <deller@gmx.de>
      Cc: James Hogan <james.hogan@imgtec.com>
      Cc: Jesper Nilsson <jesper.nilsson@axis.com>
      Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
      Cc: Ley Foon Tan <lftan@altera.com>
      Cc: Mark Salter <msalter@redhat.com>
      Cc: Mikael Starvik <starvik@axis.com>
      Cc: Steven Miao <realmz6@gmail.com>
      Cc: Vineet Gupta <vgupta@synopsys.com>
      Cc: Christian Borntraeger <borntraeger@de.ibm.com>
      Cc: Joerg Roedel <jroedel@suse.de>
      Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
      Signed-off-by: NValentin Rothberg <valentinrothberg@gmail.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      e1c7e324
  4. 31 8月, 2015 1 次提交
    • M
      alpha: select CONFIG_ARCH_USE_CMPXCHG_LOCKREF. · 8f8dcb3f
      Matt Turner 提交于
      On Alpha we have spinlocks that are 32b in size and an efficient
      cmpxchg64 implementation, so we qualify to make use of cmpxchg backed
      lockrefs. Select the ARCH_USE_CMPXCHG_LOCKREF Kconfig symbol and provide
      a trivial implementation of arch_spin_value_unlocked to satisfy the
      lockref code.
      
      Using Linus' simple testcase from
      http://article.gmane.org/gmane.linux.file-systems/77466 on a dual CPU
      ES47 system I see around an 8% gain:
      
          N       Min       Max    Median       Avg      Stddev
      x  30   6194580   6295654   6272504   6272514   17694.232
      +  30   6731164   6786334   6767982d   6764274   13738.863
      Difference at 95.0% confidence
              491760 +/- 8188.17
              7.83992% +/- 0.130541%
              (Student's t, pooled s = 15840.5)
      Signed-off-by: NMatt Turner <mattst88@gmail.com>
      8f8dcb3f
  5. 15 4月, 2015 1 次提交
    • K
      alpha: expose number of page table levels on Kconfig level · f5d4547a
      Kirill A. Shutemov 提交于
      I've implemented accounting for pmd page tables as we have for pte (see
      mm->nr_ptes).  It's requires a new counter in mm_struct: mm->nr_pmds.
      
      But the feature doesn't make any sense if an architecture has PMD level
      folded and it would be nice get rid of the counter in this case.
      
      The problem is that we cannot use __PAGETABLE_PMD_FOLDED in
      <linux/mm_types.h> due to circular dependencies:
      
      <linux/mm_types> -> <asm/pgtable.h> -> <linux/mm_types.h>
      
      In most cases <asm/pgtable.h> wants <linux/mm_types.h> to get definition
      of struct page and struct vm_area_struct.  I've tried to split mm_struct
      into separate header file to be able to user <asm/pgtable.h> there.
      
      But it doesn't fly on some architectures, like ARM: it wants mm_struct
      <asm/pgtable.h> to implement tlb flushing.  I don't see how to fix it
      without massive de-inlining or coverting a lot for inline functions to
      macros.
      
      This is other approach: expose number of page tables in use via Kconfig
      and use it in <linux/mm_types.h> instead of __PAGETABLE_PMD_FOLDED from
      <asm/pgtable.h>.
      
      This patch (of 19):
      
      We would want to use number of page table level to define mm_struct.
      Let's expose it as CONFIG_PGTABLE_LEVELS.
      Signed-off-by: NKirill A. Shutemov <kirill.shutemov@linux.intel.com>
      Acked-by: NRichard Henderson <rth@twiddle.net>
      Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
      Cc: Matt Turner <mattst88@gmail.com>
      Tested-by: NGuenter Roeck <linux@roeck-us.net>
      Cc: "David S. Miller" <davem@davemloft.net>
      Cc: "H. Peter Anvin" <hpa@zytor.com>
      Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Catalin Marinas <catalin.marinas@arm.com>
      Cc: Chris Metcalf <cmetcalf@ezchip.com>
      Cc: David Howells <dhowells@redhat.com>
      Cc: Fenghua Yu <fenghua.yu@intel.com>
      Cc: Geert Uytterhoeven <geert@linux-m68k.org>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Helge Deller <deller@gmx.de>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Jeff Dike <jdike@addtoit.com>
      Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
      Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Michael Ellerman <mpe@ellerman.id.au>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Ralf Baechle <ralf@linux-mips.org>
      Cc: Richard Weinberger <richard@nod.at>
      Cc: Russell King <linux@arm.linux.org.uk>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Tony Luck <tony.luck@intel.com>
      Cc: Will Deacon <will.deacon@arm.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      f5d4547a
  6. 20 3月, 2014 2 次提交
  7. 01 2月, 2014 1 次提交
  8. 24 1月, 2014 1 次提交
  9. 03 1月, 2014 1 次提交
  10. 17 11月, 2013 5 次提交
    • R
      alpha: Switch to GENERIC_CLOCKEVENTS · a1659d6d
      Richard Henderson 提交于
      This allows us to get rid of some hacky code for SMP.  Get rid of
      some cycle counter hackery that's now handled by generic code via
      clocksource + clock_event_device objects.
      Signed-off-by: NRichard Henderson <rth@twiddle.net>
      a1659d6d
    • R
      alpha: Reorganize rtc handling · 85d0b3a5
      Richard Henderson 提交于
      Discontinue use of GENERIC_CMOS_UPDATE; rely on the RTC subsystem.
      
      The marvel platform requires that the rtc only be touched from the
      boot cpu.  This had been partially implemented with hooks for
      get/set_rtc_time, but read/update_persistent_clock were not handled.
      Move the hooks from the machine_vec to a special rtc_class_ops struct.
      
      We had read_persistent_clock managing the epoch against which the
      rtc hw is based, but this didn't apply to get_rtc_time or set_rtc_time.
      This resulted in incorrect values when hwclock(8) gets involved.
      
      Allow the epoch to be set from the kernel command-line, overriding
      the autodetection, which is doomed to fail in 2020.  Further, by
      implementing the rtc ioctl function, we can expose this epoch to
      userland.
      
      Elide the alarm functions that RTC_DRV_CMOS implements.  This was
      highly questionable on Alpha, since the interrupt is used by the
      system timer.
      Signed-off-by: NRichard Henderson <rth@twiddle.net>
      85d0b3a5
    • R
      alpha: Primitive support for CPU power down. · 7f3bbb82
      Richard Henderson 提交于
      Use WTINT to wait for the next interrupt.  Squash the WTINT call
      if the PALcode doesn't support it (e.g. MILO).  No attempt is yet
      made to skip clock ticks during normal scheduling in order to stay
      in power down mode longer.
      Signed-off-by: NRichard Henderson <rth@twiddle.net>
      7f3bbb82
    • R
      alpha: Allow HZ to be configured · fddd87d6
      Richard Henderson 提交于
      With the 1024Hz default, we spend 50% of QEMU emulation
      processing timer interrupts.
      Signed-off-by: NRichard Henderson <rth@twiddle.net>
      fddd87d6
    • R
      alpha: Notice if we're being run under QEMU · 994dcf70
      Richard Henderson 提交于
      When building a generic kernel, do a run-time check on the serial
      number, like we do for MILO.  When building a custom kernel, make
      this a configure-time check.
      Signed-off-by: NRichard Henderson <rth@twiddle.net>
      994dcf70
  11. 15 11月, 2013 1 次提交
  12. 24 10月, 2013 1 次提交
  13. 13 9月, 2013 1 次提交
  14. 20 7月, 2013 1 次提交
  15. 17 4月, 2013 1 次提交
  16. 16 4月, 2013 1 次提交
  17. 08 4月, 2013 1 次提交
  18. 13 3月, 2013 1 次提交
  19. 04 3月, 2013 1 次提交
  20. 28 2月, 2013 1 次提交
  21. 14 2月, 2013 1 次提交
    • A
      burying unused conditionals · d64008a8
      Al Viro 提交于
      __ARCH_WANT_SYS_RT_SIGACTION,
      __ARCH_WANT_SYS_RT_SIGSUSPEND,
      __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND,
      __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL - not used anymore
      CONFIG_GENERIC_{SIGALTSTACK,COMPAT_RT_SIG{ACTION,QUEUEINFO,PENDING,PROCMASK}} -
      can be assumed always set.
      d64008a8
  22. 04 2月, 2013 2 次提交
  23. 19 1月, 2013 1 次提交
    • J
      tty: Added a CONFIG_TTY option to allow removal of TTY · 4f73bc4d
      Joe Millenbach 提交于
      The option allows you to remove TTY and compile without errors. This
      saves space on systems that won't support TTY interfaces anyway.
      bloat-o-meter output is below.
      
      The bulk of this patch consists of Kconfig changes adding "depends on
      TTY" to various serial devices and similar drivers that require the TTY
      layer.  Ideally, these dependencies would occur on a common intermediate
      symbol such as SERIO, but most drivers "select SERIO" rather than
      "depends on SERIO", and "select" does not respect dependencies.
      
      bloat-o-meter output comparing our previous minimal to new minimal by
      removing TTY.  The list is filtered to not show removed entries with awk
      '$3 != "-"' as the list was very long.
      
      add/remove: 0/226 grow/shrink: 2/14 up/down: 6/-35356 (-35350)
      function                                     old     new   delta
      chr_dev_init                                 166     170      +4
      allow_signal                                  80      82      +2
      static.__warned                              143     142      -1
      disallow_signal                               63      62      -1
      __set_special_pids                            95      94      -1
      unregister_console                           126     121      -5
      start_kernel                                 546     541      -5
      register_console                             593     588      -5
      copy_from_user                                45      40      -5
      sys_setsid                                   128     120      -8
      sys_vhangup                                   32      19     -13
      do_exit                                     1543    1526     -17
      bitmap_zero                                   60      40     -20
      arch_local_irq_save                          137     117     -20
      release_task                                 674     652     -22
      static.spin_unlock_irqrestore                308     260     -48
      Signed-off-by: NJoe Millenbach <jmillenbach@gmail.com>
      Reviewed-by: NJamey Sharp <jamey@minilop.net>
      Reviewed-by: NJosh Triplett <josh@joshtriplett.org>
      Signed-off-by: NGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      4f73bc4d
  24. 12 1月, 2013 1 次提交
    • K
      arch/alpha: remove depends on CONFIG_EXPERIMENTAL · 88d11bb9
      Kees Cook 提交于
      The CONFIG_EXPERIMENTAL config item has not carried much meaning for a
      while now and is almost always enabled by default. As agreed during the
      Linux kernel summit, remove it from any "depends on" lines in Kconfigs.
      
      CC: Richard Henderson <rth@twiddle.net>
      CC: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
      CC: Thomas Gleixner <tglx@linutronix.de>
      CC: "Michael S. Tsirkin" <mst@redhat.com>
      CC: Anna-Maria Gleixner <anna-maria@glx-um.de>
      CC: Andrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NKees Cook <keescook@chromium.org>
      Acked-by: NMatt Turner <mattst88@gmail.com>
      88d11bb9
  25. 20 12月, 2012 2 次提交
  26. 18 11月, 2012 1 次提交
  27. 13 10月, 2012 1 次提交
  28. 01 10月, 2012 1 次提交
  29. 28 9月, 2012 1 次提交
    • D
      Make most arch asm/module.h files use asm-generic/module.h · 786d35d4
      David Howells 提交于
      Use the mapping of Elf_[SPE]hdr, Elf_Addr, Elf_Sym, Elf_Dyn, Elf_Rel/Rela,
      ELF_R_TYPE() and ELF_R_SYM() to either the 32-bit version or the 64-bit version
      into asm-generic/module.h for all arches bar MIPS.
      
      Also, use the generic definition mod_arch_specific where possible.
      
      To this end, I've defined three new config bools:
      
       (*) HAVE_MOD_ARCH_SPECIFIC
      
           Arches define this if they don't want to use the empty generic
           mod_arch_specific struct.
      
       (*) MODULES_USE_ELF_RELA
      
           Arches define this if their modules can contain RELA records.  This causes
           the Elf_Rela mapping to be emitted and allows apply_relocate_add() to be
           defined by the arch rather than have the core emit an error message.
      
       (*) MODULES_USE_ELF_REL
      
           Arches define this if their modules can contain REL records.  This causes
           the Elf_Rel mapping to be emitted and allows apply_relocate() to be
           defined by the arch rather than have the core emit an error message.
      
      Note that it is possible to allow both REL and RELA records: m68k and mips are
      two arches that do this.
      
      With this, some arch asm/module.h files can be deleted entirely and replaced
      with a generic-y marker in the arch Kbuild file.
      
      Additionally, I have removed the bits from m32r and score that handle the
      unsupported type of relocation record as that's now handled centrally.
      Signed-off-by: NDavid Howells <dhowells@redhat.com>
      Acked-by: NSam Ravnborg <sam@ravnborg.org>
      Signed-off-by: NRusty Russell <rusty@rustcorp.com.au>
      786d35d4
  30. 19 8月, 2012 1 次提交
    • M
      alpha: Use new generic strncpy_from_user() and strnlen_user() · f2db633d
      Michael Cree 提交于
      Similar to x86/sparc/powerpc implementations except:
      1) we implement an extremely efficient has_zero()/find_zero()
         sequence with both prep_zero_mask() and create_zero_mask()
         no-operations.
      2) Our output from prep_zero_mask() differs in that only the
         lowest eight bits are used to represent the zero bytes
         nevertheless it can be safely ORed with other similar masks
         from prep_zero_mask() and forms input to create_zero_mask(),
         the two fundamental properties prep_zero_mask() must satisfy.
      
      Tests on EV67 and EV68 CPUs revealed that the generic code is
      essentially as fast (to within 0.5% of CPU cycles) of the old
      Alpha specific code for large quadword-aligned strings, despite
      the 30% extra CPU instructions executed.  In contrast, the
      generic code for unaligned strings is substantially slower (by
      more than a factor of 3) than the old Alpha specific code.
      Signed-off-by: NMichael Cree <mcree@orcon.net.nz>
      Acked-by: NMatt Turner <mattst88@gmail.com>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      f2db633d
  31. 31 7月, 2012 1 次提交
  32. 21 5月, 2012 1 次提交
  33. 05 5月, 2012 1 次提交