1. 21 5月, 2016 1 次提交
    • Z
      lib/GCD.c: use binary GCD algorithm instead of Euclidean · fff7fb0b
      Zhaoxiu Zeng 提交于
      The binary GCD algorithm is based on the following facts:
      	1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
      	2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
      	3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)
      
      Even on x86 machines with reasonable division hardware, the binary
      algorithm runs about 25% faster (80% the execution time) than the
      division-based Euclidian algorithm.
      
      On platforms like Alpha and ARMv6 where division is a function call to
      emulation code, it's even more significant.
      
      There are two variants of the code here, depending on whether a fast
      __ffs (find least significant set bit) instruction is available.  This
      allows the unpredictable branches in the bit-at-a-time shifting loop to
      be eliminated.
      
      If fast __ffs is not available, the "even/odd" GCD variant is used.
      
      I use the following code to benchmark:
      
      	#include <stdio.h>
      	#include <stdlib.h>
      	#include <stdint.h>
      	#include <string.h>
      	#include <time.h>
      	#include <unistd.h>
      
      	#define swap(a, b) \
      		do { \
      			a ^= b; \
      			b ^= a; \
      			a ^= b; \
      		} while (0)
      
      	unsigned long gcd0(unsigned long a, unsigned long b)
      	{
      		unsigned long r;
      
      		if (a < b) {
      			swap(a, b);
      		}
      
      		if (b == 0)
      			return a;
      
      		while ((r = a % b) != 0) {
      			a = b;
      			b = r;
      		}
      
      		return b;
      	}
      
      	unsigned long gcd1(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		b >>= __builtin_ctzl(b);
      
      		for (;;) {
      			a >>= __builtin_ctzl(a);
      			if (a == b)
      				return a << __builtin_ctzl(r);
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      		}
      	}
      
      	unsigned long gcd2(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		r &= -r;
      
      		while (!(b & r))
      			b >>= 1;
      
      		for (;;) {
      			while (!(a & r))
      				a >>= 1;
      			if (a == b)
      				return a;
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      			a >>= 1;
      			if (a & r)
      				a += b;
      			a >>= 1;
      		}
      	}
      
      	unsigned long gcd3(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		b >>= __builtin_ctzl(b);
      		if (b == 1)
      			return r & -r;
      
      		for (;;) {
      			a >>= __builtin_ctzl(a);
      			if (a == 1)
      				return r & -r;
      			if (a == b)
      				return a << __builtin_ctzl(r);
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      		}
      	}
      
      	unsigned long gcd4(unsigned long a, unsigned long b)
      	{
      		unsigned long r = a | b;
      
      		if (!a || !b)
      			return r;
      
      		r &= -r;
      
      		while (!(b & r))
      			b >>= 1;
      		if (b == r)
      			return r;
      
      		for (;;) {
      			while (!(a & r))
      				a >>= 1;
      			if (a == r)
      				return r;
      			if (a == b)
      				return a;
      
      			if (a < b)
      				swap(a, b);
      			a -= b;
      			a >>= 1;
      			if (a & r)
      				a += b;
      			a >>= 1;
      		}
      	}
      
      	static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
      		gcd0, gcd1, gcd2, gcd3, gcd4,
      	};
      
      	#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))
      
      	#if defined(__x86_64__)
      
      	#define rdtscll(val) do { \
      		unsigned long __a,__d; \
      		__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
      		(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
      	} while(0)
      
      	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
      								unsigned long a, unsigned long b, unsigned long *res)
      	{
      		unsigned long long start, end;
      		unsigned long long ret;
      		unsigned long gcd_res;
      
      		rdtscll(start);
      		gcd_res = gcd(a, b);
      		rdtscll(end);
      
      		if (end >= start)
      			ret = end - start;
      		else
      			ret = ~0ULL - start + 1 + end;
      
      		*res = gcd_res;
      		return ret;
      	}
      
      	#else
      
      	static inline struct timespec read_time(void)
      	{
      		struct timespec time;
      		clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
      		return time;
      	}
      
      	static inline unsigned long long diff_time(struct timespec start, struct timespec end)
      	{
      		struct timespec temp;
      
      		if ((end.tv_nsec - start.tv_nsec) < 0) {
      			temp.tv_sec = end.tv_sec - start.tv_sec - 1;
      			temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
      		} else {
      			temp.tv_sec = end.tv_sec - start.tv_sec;
      			temp.tv_nsec = end.tv_nsec - start.tv_nsec;
      		}
      
      		return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
      	}
      
      	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
      								unsigned long a, unsigned long b, unsigned long *res)
      	{
      		struct timespec start, end;
      		unsigned long gcd_res;
      
      		start = read_time();
      		gcd_res = gcd(a, b);
      		end = read_time();
      
      		*res = gcd_res;
      		return diff_time(start, end);
      	}
      
      	#endif
      
      	static inline unsigned long get_rand()
      	{
      		if (sizeof(long) == 8)
      			return (unsigned long)rand() << 32 | rand();
      		else
      			return rand();
      	}
      
      	int main(int argc, char **argv)
      	{
      		unsigned int seed = time(0);
      		int loops = 100;
      		int repeats = 1000;
      		unsigned long (*res)[TEST_ENTRIES];
      		unsigned long long elapsed[TEST_ENTRIES];
      		int i, j, k;
      
      		for (;;) {
      			int opt = getopt(argc, argv, "n:r:s:");
      			/* End condition always first */
      			if (opt == -1)
      				break;
      
      			switch (opt) {
      			case 'n':
      				loops = atoi(optarg);
      				break;
      			case 'r':
      				repeats = atoi(optarg);
      				break;
      			case 's':
      				seed = strtoul(optarg, NULL, 10);
      				break;
      			default:
      				/* You won't actually get here. */
      				break;
      			}
      		}
      
      		res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
      		memset(elapsed, 0, sizeof(elapsed));
      
      		srand(seed);
      		for (j = 0; j < loops; j++) {
      			unsigned long a = get_rand();
      			/* Do we have args? */
      			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
      			unsigned long long min_elapsed[TEST_ENTRIES];
      			for (k = 0; k < repeats; k++) {
      				for (i = 0; i < TEST_ENTRIES; i++) {
      					unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
      					if (k == 0 || min_elapsed[i] > tmp)
      						min_elapsed[i] = tmp;
      				}
      			}
      			for (i = 0; i < TEST_ENTRIES; i++)
      				elapsed[i] += min_elapsed[i];
      		}
      
      		for (i = 0; i < TEST_ENTRIES; i++)
      			printf("gcd%d: elapsed %llu\n", i, elapsed[i]);
      
      		k = 0;
      		srand(seed);
      		for (j = 0; j < loops; j++) {
      			unsigned long a = get_rand();
      			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
      			for (i = 1; i < TEST_ENTRIES; i++) {
      				if (res[j][i] != res[j][0])
      					break;
      			}
      			if (i < TEST_ENTRIES) {
      				if (k == 0) {
      					k = 1;
      					fprintf(stderr, "Error:\n");
      				}
      				fprintf(stderr, "gcd(%lu, %lu): ", a, b);
      				for (i = 0; i < TEST_ENTRIES; i++)
      					fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
      			}
      		}
      
      		if (k == 0)
      			fprintf(stderr, "PASS\n");
      
      		free(res);
      
      		return 0;
      	}
      
      Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:
      
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 10174
        gcd1: elapsed 2120
        gcd2: elapsed 2902
        gcd3: elapsed 2039
        gcd4: elapsed 2812
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9309
        gcd1: elapsed 2280
        gcd2: elapsed 2822
        gcd3: elapsed 2217
        gcd4: elapsed 2710
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9589
        gcd1: elapsed 2098
        gcd2: elapsed 2815
        gcd3: elapsed 2030
        gcd4: elapsed 2718
        PASS
        zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
        gcd0: elapsed 9914
        gcd1: elapsed 2309
        gcd2: elapsed 2779
        gcd3: elapsed 2228
        gcd4: elapsed 2709
        PASS
      
      [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
      Signed-off-by: NZhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
      Signed-off-by: NGeorge Spelvin <linux@horizon.com>
      Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
      fff7fb0b
  2. 13 5月, 2016 9 次提交
    • J
      MIPS: Add probing & defs for VZ & guest features · 6ad816e7
      James Hogan 提交于
      Add a few new cpu-features.h definitions for VZ sub-features, namely the
      existence of the CP0_GuestCtl0Ext, CP0_GuestCtl1, and CP0_GuestCtl2
      registers, and support for GuestID to dialias TLB entries belonging to
      different guests.
      
      Also add certain features present in the guest, with the naming scheme
      cpu_guest_has_*. These are added separately to the main options bitfield
      since they generally parallel similar features in the root context. A
      few of these (FPU, MSA, watchpoints, perf counters, CP0_[X]ContextConfig
      registers, MAAR registers, and probably others in future) can be
      dynamically configured in the guest context, for which the
      cpu_guest_has_dyn_* macros are added.
      
      [ralf@linux-mips.org: Resolve merge conflict.]
      Signed-off-by: NJames Hogan <james.hogan@imgtec.com>
      Cc: linux-mips@linux-mips.org
      Patchwork: https://patchwork.linux-mips.org/patch/13231/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      6ad816e7
    • J
      MIPS: Add perf counter feature · 30228c40
      James Hogan 提交于
      Add CPU feature for standard MIPS r2 performance counters, as determined
      by the Config1.PC bit. Both perf_events and oprofile probe this bit, so
      lets combine the probing and change both to use cpu_has_perf.
      
      This will also be used for VZ support in KVM to know whether performance
      counters exist which can be exposed to guests.
      
      [ralf@linux-mips.org: resolve conflict.]
      Signed-off-by: NJames Hogan <james.hogan@imgtec.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Ingo Molnar <mingo@redhat.com>
      Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Robert Richter <rric@kernel.org>
      Cc: linux-mips@linux-mips.org
      Cc: oprofile-list@lists.sf.net
      Patchwork: https://patchwork.linux-mips.org/patch/13226/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      30228c40
    • J
      MIPS: Add defs & probing of [X]ContextConfig · f18bdfa1
      James Hogan 提交于
      The CP0_[X]ContextConfig registers are present if CP0_Config3.CTXTC or
      CP0_Config3.SM are set, and provide more control over which bits of
      CP0_[X]Context are set to the faulting virtual address on a TLB
      exception.
      
      KVM/VZ will need to be able to save and restore these registers in the
      guest context, so add the relevant definitions and probing of the
      ContextConfig feature in the root context first.
      
      [ralf@linux-mips.org: resolve merge conflict.]
      Signed-off-by: NJames Hogan <james.hogan@imgtec.com>
      Cc: linux-mips@linux-mips.org
      Patchwork: https://patchwork.linux-mips.org/patch/13225/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      f18bdfa1
    • J
      MIPS: Add defs & probing of BadInstr[P] registers · e06a1548
      James Hogan 提交于
      The optional CP0_BadInstr and CP0_BadInstrP registers are written with
      the encoding of the instruction that caused a synchronous exception to
      occur, and the prior branch instruction if in a delay slot.
      
      These will be useful for instruction emulation in KVM, and especially
      for VZ support where reading guest virtual memory is a bit more awkward.
      
      Add CPU option numbers and cpu_has_* definitions to indicate the
      presence of each registers, and add code to probe for them using bits in
      the CP0_Config3 register.
      
      [ralf@linux-mips.org: resolve merge conflict.]
      Signed-off-by: NJames Hogan <james.hogan@imgtec.com>
      Cc: linux-mips@linux-mips.org
      Patchwork: https://patchwork.linux-mips.org/patch/13224/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      e06a1548
    • J
      MIPS: Add defs & probing of extended CP0_EBase · 37fb60f8
      James Hogan 提交于
      The CP0_EBase register may optionally have a write gate (WG) bit to
      allow the upper bits to be written, i.e. bits 31:30 on MIPS32 since r3
      (to allow for an exception base outside of KSeg0/KSeg1 when segmentation
      control is in use) and bits 63:30 on MIPS64 (which also implies the
      extension of CP0_EBase to 64 bits long).
      
      The presence of this feature will need to be known about for VZ support
      in order to correctly save and restore all the bits of the guest
      CP0_EBase register, so add CPU feature definition and probing for this
      feature.
      
      Probing the WG bit on MIPS64 can be a bit fiddly, since 64-bit COP0
      register access instructions were UNDEFINED for 32-bit registers prior
      to MIPS r6, and it'd be nice to be able to probe without clobbering the
      existing state, so there are 3 potential paths:
      
      - If we do a 32-bit read of CP0_EBase and the WG bit is already set, the
        register must be 64-bit.
      
      - On MIPS r6 we can do a 64-bit read-modify-write to set CP0_EBase.WG,
        since the upper bits will read 0 and be ignored on write if the
        register is 32-bit.
      
      - On pre-r6 cores, we do a 32-bit read-modify-write of CP0_EBase. This
        avoids the potentially UNDEFINED behaviour, but will clobber the upper
        32-bits of CP0_EBase if it isn't a simple sign extension (which also
        requires us to ensure BEV=1 or modifying the exception base would be
        UNDEFINED too). It is hopefully unlikely a bootloader would set up
        CP0_EBase to a 64-bit segment and leave WG=0.
      
      [ralf@linux-mips.org: Resolved merge conflict.]
      Signed-off-by: NJames Hogan <james.hogan@imgtec.com>
      Tested-by: NMatt Redfearn <matt.redfearn@imgtec.com>
      Cc: linux-mips@linux-mips.org
      Patchwork: https://patchwork.linux-mips.org/patch/13223/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      37fb60f8
    • J
      MIPS: Separate XPA CPU feature into LPA and MVH · 12822570
      James Hogan 提交于
      XPA (eXtended Physical Addressing) should be detected as a combination
      of two architectural features:
      - Large Physical Address (as per Config3.LPA). With XPA this will be set
        on MIPS32r5 cores, but it may also be set for MIPS64r2 cores too.
      - MTHC0/MFHC0 instructions (as per Config5.MVH). With XPA this will be
        set, but it may also be set in VZ guest context even when Config3.LPA
        in the guest context has been cleared by the hypervisor.
      
      As such, XPA is only usable if both bits are set. Update CPU features to
      separate these two features, with cpu_has_xpa requiring both to be set.
      Signed-off-by: NJames Hogan <james.hogan@imgtec.com>
      Cc: Paul Burton <paul.burton@imgtec.com>
      Cc: Maciej W. Rozycki <macro@imgtec.com>
      Cc: Joshua Kinard <kumba@gentoo.org>
      Cc: linux-mips@linux-mips.org
      Cc: linux-kernel@vger.kernel.org
      Patchwork: https://patchwork.linux-mips.org/patch/13112/Signed-off-by: NPaul Burton <paul.burton@imgtec.com>
      Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      12822570
    • H
      MIPS: Loongson-3: Fast TLB refill handler · 380cd582
      Huacai Chen 提交于
      Loongson-3A R2 has pwbase/pwfield/pwsize/pwctl registers in CP0 (this
      is very similar to HTW) and lwdir/lwpte/lddir/ldpte instructions which
      can be used for fast TLB refill.
      
      [ralf@linux-mips.org: Resolve conflict.]
      Signed-off-by: NHuacai Chen <chenhc@lemote.com>
      Cc: Aurelien Jarno <aurelien@aurel32.net>
      Cc: Steven J . Hill <sjhill@realitydiluted.com>
      Cc: Fuxin Zhang <zhangfx@lemote.com>
      Cc: Zhangjin Wu <wuzhangjin@gmail.com>
      Cc: linux-mips@linux-mips.org
      Patchwork: https://patchwork.linux-mips.org/patch/12754/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      380cd582
    • Z
      MIPS: Detect DSP v3 support · b5a6455c
      Zubair Lutfullah Kakakhel 提交于
      DSPv3 is supported on all MIPSr6 systems which indicate support for DSPv2.
      
      This doesn't require any changes to the kernel's handling of DSP
      resources. The patch is to detect support and indicate it in /proc/cpuinfo
      
      DSP v3 introduces a new instruction BPOSGE32C
      Signed-off-by: NZubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
      Reviewed-by: NPaul Burton <paul.burton@imgtec.com>
      Cc: linux-mips@linux-mips.org
      Cc: linux-kernel@vger.kernel.org
      Patchwork: https://patchwork.linux-mips.org/patch/12918/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      b5a6455c
    • P
      MIPS: Detect MIPSr6 Virtual Processor support · f270d881
      Paul Burton 提交于
      MIPSr6 introduces support for "Virtual Processors", which are
      conceptually similar to VPEs from the now-deprecated MT ASE. Detect
      whether the system supports VPs using the VP bit in Config5, adding
      cpu_has_vp for use by later patches.
      Signed-off-by: NPaul Burton <paul.burton@imgtec.com>
      Cc: linux-mips@linux-mips.org
      Cc: Maciej W. Rozycki <macro@imgtec.com>
      Cc: Joshua Kinard <kumba@gentoo.org>
      Cc: Steven J. Hill <sjhill@realitydiluted.com>
      Cc: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
      Cc: James Hogan <james.hogan@imgtec.com>
      Cc: linux-kernel@vger.kernel.org
      Patchwork: https://patchwork.linux-mips.org/patch/12327/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      f270d881
  3. 20 1月, 2016 1 次提交
  4. 11 11月, 2015 1 次提交
    • P
      MIPS: Allow RIXI for 32-bit kernels on MIPS64 · 033549c6
      Paul Burton 提交于
      Commit a68d09a1 ("MIPS: Don't use RI/XI with 32-bit kernels on
      64-bit CPUs") prevented use of RIXI on MIPS64 systems, stating that the
      "TLB handlers cannot handle this case". What they actually couldn't
      handle was cases where there were less fill bits in the Entry{Lo,Hi}
      registers than bits used by software in PTEs. The handlers can now deal
      with this case, so enable RIXI for MIPS32 kernels on MIPS64 systems.
      
      Note that beyond the obvious benefits provided by having RIXI on such
      systems, this is required for systems implementing MIPSr6 where RIXI
      cannot be disabled.
      
      This reverts commit a68d09a1 ("MIPS: Don't use RI/XI with 32-bit
      kernels on 64-bit CPUs").
      Signed-off-by: NPaul Burton <paul.burton@imgtec.com>
      Cc: linux-mips@linux-mips.org
      Cc: Joshua Kinard <kumba@gentoo.org>
      Cc: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
      Cc: Maciej W. Rozycki <macro@linux-mips.org>
      Cc: linux-kernel@vger.kernel.org
      Cc: James Hogan <james.hogan@imgtec.com>
      Cc: Markos Chandras <markos.chandras@imgtec.com>
      Patchwork: https://patchwork.linux-mips.org/patch/11219/Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      033549c6
  5. 23 9月, 2015 1 次提交
  6. 03 9月, 2015 1 次提交
  7. 22 6月, 2015 1 次提交
    • J
      MIPS: R12000: Enable branch prediction global history · 8d5ded16
      Joshua Kinard 提交于
      The R12000 added a new feature to enhance branch prediction called
      "global history".  Per the Vr10000 Series User Manual (U10278EJ4V0UM),
      Coprocessor 0, Diagnostic Register (22):
      
      """
      If bit 26 is set, branch prediction uses all eight bits of the global
      history register.  If bit 26 is not set, then bits 25:23 specify a count
      of the number of bits of global history to be used. Thus if bits 26:23
      are all zero, global history is disabled.
      
      The global history contains a record of the taken/not-taken status of
      recently executed branches, and when used is XOR'ed with the PC of a
      branch being predicted to produce a hashed value for indexing the BPT.
      Some programs with small "working set of conditional branches" benefit
      significantly from the use of such hashing, some see slight performance
      degradation.
      """
      
      This patch enables global history on R12000 CPUs and up by setting bit
      26 in the branch prediction diagnostic register (CP0 $22) to '1'.  Bits
      25:23 are left alone so that all eight bits of the global history
      register are available for branch prediction.
      Signed-off-by: NJoshua Kinard <kumba@gentoo.org>
      Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      8d5ded16
  8. 10 4月, 2015 1 次提交
  9. 08 4月, 2015 2 次提交
  10. 31 3月, 2015 1 次提交
  11. 20 3月, 2015 1 次提交
  12. 17 2月, 2015 4 次提交
  13. 24 11月, 2014 1 次提交
  14. 22 9月, 2014 1 次提交
  15. 02 8月, 2014 3 次提交
  16. 31 5月, 2014 1 次提交
  17. 21 5月, 2014 1 次提交
  18. 27 3月, 2014 2 次提交
  19. 23 1月, 2014 2 次提交
  20. 24 9月, 2013 1 次提交
  21. 18 9月, 2013 1 次提交
  22. 05 8月, 2013 1 次提交
    • R
      MIPS: oprofile: Fix BUG due to smp_processor_id() in preemptible code. · cf5b2d23
      Ralf Baechle 提交于
      current_cpu_type() is not preemption-safe.
      If CONFIG_PREEMPT is enabled then mipsxx_reg_setup() can be called from preemptible state.
      Added get_cpu()/put_cpu() pair to make it preemption-safe.
      
      This was found while testing oprofile with CONFIG_DEBUG_PREEMPT enable.
      
      /usr/zntestsuite # opcontrol --init
      /usr/zntestsuite # opcontrol --setup --event=L2_CACHE_ACCESSES:500 --event=L2_CACHE_MISSES:500 --no-vmlinux
      /usr/zntestsuite # opcontrol --start
      Using 2.6+ OProfile kernel interface.
      BUG: using smp_processor_id() in preemptible [00000000] code: oprofiled/1362
      caller is mipsxx_reg_setup+0x11c/0x164
      CPU: 0 PID: 1362 Comm: oprofiled Not tainted 3.10.4 #18
      Stack : 00000006 70757465 00000000 00000000 00000000 00000000 80b173f6 00000037
                80b10000 00000000 80b21614 88f5a220 00000000 00000000 00000000 00000000
                00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
                00000000 00000000 00000000 89c49c00 89c49c2c 80721254 807b7927 8012c1d0
                80b10000 80721254 00000000 00000552 88f5a220 80b1335c 807b78e6 89c49ba8
                ...
      Call Trace:
      [<801099a4>] show_stack+0x64/0x7c
      [<80665520>] dump_stack+0x20/0x2c
      [<803a2250>] debug_smp_processor_id+0xe0/0xf0
      [<8052df24>] mipsxx_reg_setup+0x11c/0x164
      [<8052cd70>] op_mips_setup+0x24/0x4c
      [<80529cfc>] oprofile_setup+0x5c/0x12c
      [<8052b9f8>] event_buffer_open+0x78/0xf8
      [<801c3150>] do_dentry_open.isra.15+0x2b8/0x3b0
      [<801c3270>] finish_open+0x28/0x4c
      [<801d49b8>] do_last.isra.41+0x2cc/0xd00
      [<801d54a0>] path_openat+0xb4/0x4c4
      [<801d5c44>] do_filp_open+0x3c/0xac
      [<801c4744>] do_sys_open+0x110/0x1f4
      [<8010f47c>] stack_done+0x20/0x44
      
      Bug reported and original patch by Jerin Jacob <jerinjacobk@gmail.com>.
      Signed-off-by: NRalf Baechle <ralf@linux-mips.org>
      Acked-by: NJerin Jacob <jerinjacobk@gmail.com>
      cf5b2d23
  23. 01 7月, 2013 2 次提交