提交 · fff7fb0b2d908dec779783d8eaf3d7725230f75e · openanolis / cloud-kernel

21 5月, 2016 2 次提交

lib/GCD.c: use binary GCD algorithm instead of Euclidean · fff7fb0b

由 Zhaoxiu Zeng 提交于 5月 20, 2016

The binary GCD algorithm is based on the following facts:
	1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
	2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
	3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)

Even on x86 machines with reasonable division hardware, the binary
algorithm runs about 25% faster (80% the execution time) than the
division-based Euclidian algorithm.

On platforms like Alpha and ARMv6 where division is a function call to
emulation code, it's even more significant.

There are two variants of the code here, depending on whether a fast
__ffs (find least significant set bit) instruction is available.  This
allows the unpredictable branches in the bit-at-a-time shifting loop to
be eliminated.

If fast __ffs is not available, the "even/odd" GCD variant is used.

I use the following code to benchmark:

	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <string.h>
	#include <time.h>
	#include <unistd.h>

	#define swap(a, b) \
		do { \
			a ^= b; \
			b ^= a; \
			a ^= b; \
		} while (0)

	unsigned long gcd0(unsigned long a, unsigned long b)
	{
		unsigned long r;

		if (a < b) {
			swap(a, b);
		}

		if (b == 0)
			return a;

		while ((r = a % b) != 0) {
			a = b;
			b = r;
		}

		return b;
	}

	unsigned long gcd1(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		b >>= __builtin_ctzl(b);

		for (;;) {
			a >>= __builtin_ctzl(a);
			if (a == b)
				return a << __builtin_ctzl(r);

			if (a < b)
				swap(a, b);
			a -= b;
		}
	}

	unsigned long gcd2(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		r &= -r;

		while (!(b & r))
			b >>= 1;

		for (;;) {
			while (!(a & r))
				a >>= 1;
			if (a == b)
				return a;

			if (a < b)
				swap(a, b);
			a -= b;
			a >>= 1;
			if (a & r)
				a += b;
			a >>= 1;
		}
	}

	unsigned long gcd3(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		b >>= __builtin_ctzl(b);
		if (b == 1)
			return r & -r;

		for (;;) {
			a >>= __builtin_ctzl(a);
			if (a == 1)
				return r & -r;
			if (a == b)
				return a << __builtin_ctzl(r);

			if (a < b)
				swap(a, b);
			a -= b;
		}
	}

	unsigned long gcd4(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		r &= -r;

		while (!(b & r))
			b >>= 1;
		if (b == r)
			return r;

		for (;;) {
			while (!(a & r))
				a >>= 1;
			if (a == r)
				return r;
			if (a == b)
				return a;

			if (a < b)
				swap(a, b);
			a -= b;
			a >>= 1;
			if (a & r)
				a += b;
			a >>= 1;
		}
	}

	static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
		gcd0, gcd1, gcd2, gcd3, gcd4,
	};

	#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))

	#if defined(__x86_64__)

	#define rdtscll(val) do { \
		unsigned long __a,__d; \
		__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
		(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
	} while(0)

	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
								unsigned long a, unsigned long b, unsigned long *res)
	{
		unsigned long long start, end;
		unsigned long long ret;
		unsigned long gcd_res;

		rdtscll(start);
		gcd_res = gcd(a, b);
		rdtscll(end);

		if (end >= start)
			ret = end - start;
		else
			ret = ~0ULL - start + 1 + end;

		*res = gcd_res;
		return ret;
	}

	#else

	static inline struct timespec read_time(void)
	{
		struct timespec time;
		clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
		return time;
	}

	static inline unsigned long long diff_time(struct timespec start, struct timespec end)
	{
		struct timespec temp;

		if ((end.tv_nsec - start.tv_nsec) < 0) {
			temp.tv_sec = end.tv_sec - start.tv_sec - 1;
			temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
		} else {
			temp.tv_sec = end.tv_sec - start.tv_sec;
			temp.tv_nsec = end.tv_nsec - start.tv_nsec;
		}

		return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
	}

	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
								unsigned long a, unsigned long b, unsigned long *res)
	{
		struct timespec start, end;
		unsigned long gcd_res;

		start = read_time();
		gcd_res = gcd(a, b);
		end = read_time();

		*res = gcd_res;
		return diff_time(start, end);
	}

	#endif

	static inline unsigned long get_rand()
	{
		if (sizeof(long) == 8)
			return (unsigned long)rand() << 32 | rand();
		else
			return rand();
	}

	int main(int argc, char **argv)
	{
		unsigned int seed = time(0);
		int loops = 100;
		int repeats = 1000;
		unsigned long (*res)[TEST_ENTRIES];
		unsigned long long elapsed[TEST_ENTRIES];
		int i, j, k;

		for (;;) {
			int opt = getopt(argc, argv, "n:r:s:");
			/* End condition always first */
			if (opt == -1)
				break;

			switch (opt) {
			case 'n':
				loops = atoi(optarg);
				break;
			case 'r':
				repeats = atoi(optarg);
				break;
			case 's':
				seed = strtoul(optarg, NULL, 10);
				break;
			default:
				/* You won't actually get here. */
				break;
			}
		}

		res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
		memset(elapsed, 0, sizeof(elapsed));

		srand(seed);
		for (j = 0; j < loops; j++) {
			unsigned long a = get_rand();
			/* Do we have args? */
			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
			unsigned long long min_elapsed[TEST_ENTRIES];
			for (k = 0; k < repeats; k++) {
				for (i = 0; i < TEST_ENTRIES; i++) {
					unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
					if (k == 0 || min_elapsed[i] > tmp)
						min_elapsed[i] = tmp;
				}
			}
			for (i = 0; i < TEST_ENTRIES; i++)
				elapsed[i] += min_elapsed[i];
		}

		for (i = 0; i < TEST_ENTRIES; i++)
			printf("gcd%d: elapsed %llu\n", i, elapsed[i]);

		k = 0;
		srand(seed);
		for (j = 0; j < loops; j++) {
			unsigned long a = get_rand();
			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
			for (i = 1; i < TEST_ENTRIES; i++) {
				if (res[j][i] != res[j][0])
					break;
			}
			if (i < TEST_ENTRIES) {
				if (k == 0) {
					k = 1;
					fprintf(stderr, "Error:\n");
				}
				fprintf(stderr, "gcd(%lu, %lu): ", a, b);
				for (i = 0; i < TEST_ENTRIES; i++)
					fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
			}
		}

		if (k == 0)
			fprintf(stderr, "PASS\n");

		free(res);

		return 0;
	}

Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:

  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 10174
  gcd1: elapsed 2120
  gcd2: elapsed 2902
  gcd3: elapsed 2039
  gcd4: elapsed 2812
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9309
  gcd1: elapsed 2280
  gcd2: elapsed 2822
  gcd3: elapsed 2217
  gcd4: elapsed 2710
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9589
  gcd1: elapsed 2098
  gcd2: elapsed 2815
  gcd3: elapsed 2030
  gcd4: elapsed 2718
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9914
  gcd1: elapsed 2309
  gcd2: elapsed 2779
  gcd3: elapsed 2228
  gcd4: elapsed 2709
  PASS

[akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
Signed-off-by: NZhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
Signed-off-by: NGeorge Spelvin <linux@horizon.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>

fff7fb0b

exit_thread: remove empty bodies · 5f56a5df

由 Jiri Slaby 提交于 5月 20, 2016

Define HAVE_EXIT_THREAD for archs which want to do something in
exit_thread. For others, let's define exit_thread as an empty inline.

This is a cleanup before we change the prototype of exit_thread to
accept a task parameter.

[akpm@linux-foundation.org: fix mips]
Signed-off-by: NJiri Slaby <jslaby@suse.cz>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Steven Miao <realmz6@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>

5f56a5df

20 5月, 2016 1 次提交

arch: fix has_transparent_hugepage() · fd8cfd30

由 Hugh Dickins 提交于 5月 19, 2016

I've just discovered that the useful-sounding has_transparent_hugepage()
is actually an architecture-dependent minefield: on some arches it only
builds if CONFIG_TRANSPARENT_HUGEPAGE=y, on others it's also there when
not, but on some of those (arm and arm64) it then gives the wrong
answer; and on mips alone it's marked __init, which would crash if
called later (but so far it has not been called later).

Straighten this out: make it available to all configs, with a sensible
default in asm-generic/pgtable.h, removing its definitions from those
arches (arc, arm, arm64, sparc, tile) which are served by the default,
adding #define has_transparent_hugepage has_transparent_hugepage to
those (mips, powerpc, s390, x86) which need to override the default at
runtime, and removing the __init from mips (but maybe that kind of code
should be avoided after init: set a static variable the first time it's
called).
Signed-off-by: NHugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Ning Qu <quning@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Acked-by: NDavid S. Miller <davem@davemloft.net>
Acked-by: Vineet Gupta <vgupta@synopsys.com>		[arch/arc]
Acked-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>	[arch/s390]
Acked-by: NIngo Molnar <mingo@kernel.org>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>

fd8cfd30

18 5月, 2016 1 次提交

arc: axs103_smp: Fix CPU frequency to 100MHz for dual-core · 776d7f16

由 Alexey Brodkin 提交于 5月 16, 2016

The most recent release of AXS103 [v1.1] is proven to work
at 100 MHz in dual-core mode so this change uses mentioned feature.
For that we:
 * Update axc003_idu.dtsi with mention of really-used CPU clock freq
 * Remove clock override in AXS platform code for dual-core HW

Note we're still leaving a hack for clock "downgrade" on early boot
for quad-core hardware.

Also note this change will break functionality of AXS103 v1.0 hardware.
That means all users of AXS103 __must__ upgrade their boards with the
most recent firmware.
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

776d7f16

13 5月, 2016 2 次提交

arc: axs10x: Add DT bindings for I2S PLL Clock · 445a6421

由 Jose Abreu 提交于 4月 21, 2016

Add device tree bindings for AXS10X I2S PLL Clock driver.
Acked-by: NAlexey Brodkin <abrodkin@synopsys.com>
Acked-by: NVineet Gupta <vgupta@synopsys.com>
Signed-off-by: NJose Abreu <joabreu@synopsys.com>

445a6421

V
ARC: pae: STRICT_MM_TYPECHECKS was broken · 5035cd5b
由 Vineet Gupta 提交于 5月 03, 2016
```
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
```
5035cd5b

09 5月, 2016 30 次提交

ARC: Add eznps platform to Kconfig and Makefile · 96665789

由 Noam Camus 提交于 10月 16, 2015

This commit should be left last since only now eznps platform
is in state which one can actually use.
Signed-off-by: NNoam Camus <noamc@ezchip.com>

96665789

ARC: [plat-eznps] Use dedicated COMMAND_LINE_SIZE · 085572f3

由 Noam Camus 提交于 5月 29, 2015

The default 256 bytes sometimes is just not enough.
We usually provide earlycon=... and console=... and ip=...
All this and more may need more room.
Signed-off-by: NNoam Camus <noamc@ezchip.com>
Acked-by: NVineet Gupta <vgupta@synopsys.com>

085572f3

ARC: [plat-eznps] Use dedicated cpu_relax() · 46c3e6b8

由 Tal Zilcer 提交于 3月 09, 2015

Since the CTOP is SMT hardware multi-threaded, we need to hint
the HW that now will be a very good time to do a hardware
thread context switching. This is done by issuing the schd.rw
instruction (binary coded here so as to not require specific
revision of GCC to build the kernel).
sched.rw means that Thread becomes eligible for execution by
the threads scheduler after all pending read/write
transactions were completed.

Implementing cpu_relax_lowlatency() with barrier()
Since with current semantics of cpu_relax() it may take a
while till yielded CPU will get back.
Signed-off-by: NNoam Camus <noamc@ezchip.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: NVineet Gupta <vgupta@synopsys.com>

46c3e6b8

ARC: [plat-eznps] Use dedicated identity auxiliary register. · 86c25466

由 Noam Camus 提交于 6月 03, 2013

With generic "identity" num of CPUs is limited to 256 (8 bit).
We use our alternative AUX register GLOBAL_ID (12 bit).
Now we can support up to 4096 CPUs.
Signed-off-by: NNoam Camus <noamc@ezchip.com>

86c25466

ARC: [plat-eznps] Use dedicated SMP barriers · b1f2f6f3

由 Noam Camus 提交于 4月 10, 2015

NPS device got 256 cores and each got 16 HW threads (SMT).
We use EZchip dedicated ISA to trigger HW scheduler of the
core that current HW thread belongs to.
This scheduling makes sure that data beyond barrier is available
to all HW threads in core and by that to all in device (4K).
Signed-off-by: NNoam Camus <noamc@ezchip.com>
Cc: Peter Zijlstra <peterz@infradead.org>

b1f2f6f3

ARC: [plat-eznps] Use dedicated atomic/bitops/cmpxchg · a5a10d99

由 Noam Camus 提交于 5月 16, 2015

We need our own implementaions since we lack LLSC support.
Our extended ISA provided with optimized solution for all 32bit
operations we see in these three headers.
Signed-off-by: NNoam Camus <noamc@ezchip.com>

a5a10d99

ARC: [plat-eznps] Use dedicated user stack top · 8bcf2c48

由 Noam Camus 提交于 12月 06, 2015

NPS use special mapping right below TASK_SIZE.
Hence we need to lower STACK_TOP so that user stack won't
overlap NPS special mapping.
Signed-off-by: NNoam Camus <noamc@ezchip.com>
Acked-by: NVineet Gupta <vgupta@synopsys.com>

8bcf2c48

ARC: [plat-eznps] Add eznps platform · 4a66d3fe

由 Noam Camus 提交于 8月 19, 2012

This platform include boards:
	Hardware Emulator (HE)
	Simulator based upon nSIM.
Signed-off-by: NNoam Camus <noamc@ezchip.com>

4a66d3fe

N
ARC: [plat-eznps] Add eznps board defconfig and dts · 845033be
由 Noam Camus 提交于 9月 10, 2012
```
Adding default configuration file and DTS file
Signed-off-by: NNoam Camus <noamc@ezchip.com>
```
845033be

ARC: Mark secondary cpu online only after all HW setup is done · 71f9cf8f

由 Noam Camus 提交于 11月 07, 2015

In SMP setup, master loops for each_present_cpu calling cpu_up().
For ARC it returns as soon as new cpu's status becomes online,
However secondary may still do HW initializing,
machine or platform hook level.

So turn secondary online only after all HW setup is done.
Signed-off-by: NNoam Camus <noamc@ezchip.com>
Acked-by: NVineet Gupta <vgupta@synopsys.com>

71f9cf8f

ARC: rwlock: disable interrupts in !LLSC variant · 2a1021fc

由 Noam Camus 提交于 6月 09, 2015

If we hold rwlock and interrupt occures we may
end up spinning on it for ever during softirq.
Note that this lock is an internal lock
and since the lock is free to be used from any context,
the lock needs to be IRQ-safe.

Below you may see an example for interrupt we get while
nl_table_lock is holding its rw->lock_mutex and we spinned
on it for ever.

The concept for the fix was taken from SPARC.

[2015-05-12 19:16:12] Stack Trace:
[2015-05-12 19:16:12]   arc_unwind_core+0xb8/0x11c
[2015-05-12 19:16:12]   dump_stack+0x68/0xac
[2015-05-12 19:16:12]   _raw_read_lock+0xa8/0xac
[2015-05-12 19:16:12]   netlink_broadcast_filtered+0x56/0x35c
[2015-05-12 19:16:12]   nlmsg_notify+0x42/0xa4
[2015-05-12 19:16:13]   neigh_update+0x1fe/0x44c
[2015-05-12 19:16:13]   neigh_event_ns+0x40/0xa4
[2015-05-12 19:16:13]   arp_process+0x46e/0x5a8
[2015-05-12 19:16:13]   __netif_receive_skb_core+0x358/0x500
[2015-05-12 19:16:13]   process_backlog+0x92/0x154
[2015-05-12 19:16:13]   net_rx_action+0xb8/0x188
[2015-05-12 19:16:13]   __do_softirq+0xda/0x1d8
[2015-05-12 19:16:14]   irq_exit+0x8a/0x8c
[2015-05-12 19:16:14]   arch_do_IRQ+0x6c/0xa8
[2015-05-12 19:16:14]   handle_interrupt_level1+0xe4/0xf0
Signed-off-by: NNoam Camus <noamc@ezchip.com>
Acked-by: NPeter Zijlstra <peterz@infradead.org>

2a1021fc

ARC: Make vmalloc size configurable · 15ca68a9

由 Noam Camus 提交于 9月 07, 2014

On ARC, lower 2G of address space is translated and used for
 - user vaddr space (region 0 to 5)
 - unused kernel-user gutter (region 6)
 - kernel vaddr space (region 7)

where each region simply represents 256MB of address space.

The kernel vaddr space of 256MB is used to implement vmalloc, modules
So far this was enough, but not on EZChip system with 4K CPUs (given
that per cpu mechanism uses vmalloc for allocating chunks)

So allow VMALLOC_SIZE to be configurable by expanding down into the unused
kernel-user gutter region which at default 256M was excessive anyways.

Also use _BITUL() to fix a build error since PGDIR_SIZE cannot use "1UL"
as called from assembly code in mm/tlbex.S
Signed-off-by: NNoam Camus <noamc@ezchip.com>
[vgupta: rewrote changelog, debugged bootup crash due to int vs. hex]
Acked-by: NVineet Gupta <vgupta@synopsys.com>

15ca68a9

ARC: clean out UAPI byteorder.h clean off Kconfig symbol · 4bb40c6d

由 Noam Camus 提交于 1月 13, 2016

UAPI header should not use Kconfig items

Use __BIG_ENDIAN__ defined as a compiler intrinsic
Signed-off-by: NNoam Camus <noamc@ezchip.com>
[vgupta: fix changelog]
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

4bb40c6d

ARC: RIP arc_{get|set}_core_freq() clk API · 6e9318d1

由 Alexey Brodkin 提交于 2月 01, 2016

There are no more users of this - so RIP!
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
[vgupta: update changelog]
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

6e9318d1

ARC: Don't try to use value of top level clock-frequency in DT · 20c7dbbd

由 Alexey Brodkin 提交于 2月 01, 2016

We no longer use it and instead a real clk device such as fixed-clk
instance is fed to timers etc.
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
[vgupta: broken out of a bigger patch, rewrote changelog]
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

20c7dbbd

ARC: use fixed frequencies in arc_set_early_base_baud() · 0e6e9b64

由 Alexey Brodkin 提交于 2月 01, 2016

UARTs usually have fixed clock so we're switching to use of
constant values instead of something derived from core clock
frequency.

Among other things this will allow us to get rid of
arc_{get|set}_core_freq() and switch to generic clock
framework later on.
Acked-by: NChristian Ruppert <christian.ruppert@alitech.com>
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

0e6e9b64

ARC: [intc-*] switch to linear domain · d21beffb

由 Vineet Gupta 提交于 1月 28, 2016

Now that we have Timers probed from DT, don't need legacy domain

This however requires mapping to be called explicitly for the IRQ which
still can't (and probably never) be probed from DT such as IPI and
SOFTIRQ
Acked-by: NMarc Zyngier <marc.zyngier@arm.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

d21beffb

ARC: [intc-*] Do a domain lookup in primary handler for hwirq -> linux virq · 1b0ccb8a

由 Vineet Gupta 提交于 1月 01, 2016

The primary interrupt handler arch_do_IRQ() was passing hwirq as linux
virq to core code. This was fragile and worked so far as we only had legacy/linear
domains.

This came out of a rant by Marc Zyngier.
http://lists.infradead.org/pipermail/linux-snps-arc/2015-December/000298.html

Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Noam Camus <noamc@ezchip.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

1b0ccb8a

ARC: irq: export some IRQs again · 88555cc5

由 Vineet Gupta 提交于 3月 30, 2016

This will be needed for switching to linear irq domain as
irq_create_mapping() called by intr code needs the IRQ numbers
in addition to existing usage in mcip.c for requesting the irq
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

88555cc5

ARC: clocksource: DT based probe · e608b53e

由 Vineet Gupta 提交于 1月 01, 2016

- Remove explicit clocksource setup and let it be done by OF framework
  by defining CLOCKSOURCE_OF_DECLARE() for various timers

- This allows multiple clocksources to be potentially registered
  simultaneouly: previously we could only do one - as all of them had
  same arc_counter_setup() routine for registration

- Setup routines also ensure that the underlying timer actually exists.

- Remove some of the panic() calls if underlying timer is NOT detected as
  fallback clocksource might still be available
  1. If GRFC doesn't exist, jiffies clocksource gets registered anyways
  2. if RTC doesn't exist, TIMER1 can take over (as it is always
     present)

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

e608b53e

ARC: clockevent: DT based probe · 77c8d0d6

由 Vineet Gupta 提交于 1月 01, 2016

 - timer frequency is derived from DT (no longer rely on top level
   DT "clock-frequency" probed early and exported by asm/clk.h)

 - TIMER0_IRQ need not be exported across arch code, confined to intc as
   it is property of same

 - Any failures in clockevent setup are considered pedantic and system
   panic()'s as there is no generic fallback (unlike clocksource where
   a jiffies based soft clocksource always exists)
Acked-by: NDaniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

77c8d0d6

ARC: [dts] Introduce Timer bindings · 7ec9f34a

由 Vineet Gupta 提交于 1月 01, 2016

ARC Timers have historically been probed directly.
As precursor to start probing Timers thru DT introduce these bindings
Note that to keep series bisectable, these bindings are not yet used in
code.

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: devicetree@vger.kernel.org
Acked-by: NRob Herring <robh@kernel.org>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

7ec9f34a

ARC: [dts] Add clk feeding into timers to DTs · b3d6aba8

由 Vineet Gupta 提交于 1月 01, 2016

This allows us to introduce timers in DT in next commit

The core clk frequency hack in AXS103 platform is also extended,
where the core clk feeding into timers is updated in-place in FDT.

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Rob Herring <robh@kernel.org>
Cc: devicetree@vger.kernel.org
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

b3d6aba8

ARC: [dts] Rename cpu_intc -> core_intc · 9ba7648c

由 Vineet Gupta 提交于 1月 28, 2016

This is again for future changes to use common DTSI for timers which
refer to @core_intc
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

9ba7648c

ARCv2: [dts]: Introduce HS38 specific include DTS ... · 2e8cd938

由 Vineet Gupta 提交于 1月 19, 2016

... and add them to plat-sim DTS.

This allows for future change to introduce timers in DT in single place
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

2e8cd938

ARC: clockevent: Prepare for DT based probe · 69fbd098

由 Noam Camus 提交于 1月 14, 2016

 - call clocksource_probe()
 - This in turns needs of_clk_init() to be called earlier

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: NNoam Camus <noamc@ezchip.com>
[vgupta: broken off from a bigger patch]
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

69fbd098

ARC: clockevent: switch to cpu notifier for clockevent setup · eec3c58e

由 Noam Camus 提交于 1月 01, 2016

ARC Timers so far have been handled as "legacy" w/o explicit description
in DT. This poses challenge for newer platforms wanting to use them.
This series will eventually help move timers over to DT.

This patch does a small change of using a CPU notifier to set clockevent
on non-boot CPUs. So explicit setup is done only on boot CPU (which will
later be done by DT)
Signed-off-by: NNoam Camus <noamc@ezchip.com>
[vgupta: broken off from a bigger patch]
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

eec3c58e

V
ARC: [plat-axs] Refactor core freq get/set · 0eeb3dfe
由 Vineet Gupta 提交于 4月 05, 2016
```
Reduces diff in future patches !
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
```
0eeb3dfe

ARC: opencode arc_request_percpu_irq · 56957940

由 Vineet Gupta 提交于 1月 28, 2016

- The idea is to remove the API usage since it has a subltle
  design flaw - relies on being called on cpu0 first. This is true for
  some early per cpu irqs such as TIMER/IPI, but not for late probed
  per cpu peripherals such a perf. And it's usage in perf has already
  bitten us once: see c6317bc7
  ("ARCv2: perf: Ensure perf intr gets enabled on all cores") where we
  ended up open coding it anyways

- The seeming duplication will go away once we start using cpu notifier
  for timer setup
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

56957940

ARC: [intc-compact] setup TIMER as percpu_dev · db4c4426

由 Vineet Gupta 提交于 1月 28, 2016

This removes the quirk from arc_request_percpu_irq() and paves way for
future simplifications
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

db4c4426

05 5月, 2016 3 次提交

ARC: support HIGHMEM even without PAE40 · 26f9d5fd

由 Vineet Gupta 提交于 4月 18, 2016

Initial HIGHMEM support on ARC was introduced for PAE40 where the low
memory (0x8000_0000 based) and high memory (0x1_0000_0000) were
physically contiguous. So CONFIG_FLATMEM sufficed (despite a peipheral
hole in the middle, which wasted a bit of struct page memory, but things
worked).

However w/o PAE, highmem was not possible and we could only reach
~1.75GB of DDR. Now there is a use case to access ~4GB of DDR w/o PAE40
The idea is to have low memory at canonical 0x8000_0000 and highmem
at 0 so enire 4GB address space is available for physical addressing
This needs additional platform/interconnect mapping to convert
the non contiguous physical addresses into linear bus adresses.

From Linux point of view, non contiguous divide means FLATMEM no
longer works and DISCONTIGMEM is needed to track the pfns in the 2
regions.

This scheme would also work for PAE40, only better in that we don't
waste struct page memory for the peripheral hole.

The DT description will be something like

    memory {
        ...
        reg = <0x80000000 0x200000000   /* 512MB: lowmem */
               0x00000000 0x10000000>;  /* 256MB: highmem */
   }
Signed-off-by: NNoam Camus <noamc@ezchip.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

26f9d5fd

ARC: Fix PAE40 boot failures due to PTE truncation · 2519d753

由 Vineet Gupta 提交于 5月 05, 2016

So a benign looking cleanup which macro'ized PAGE_SHIFT shifts turned
out to be bad (since it was done non-sensically across the board).

It caused boot failures with PAE40 as forced cast to (unsigned long)
from newly introduced virt_to_pfn() was causing truncatiion of the
(long long) pte/paddr values.

It is OK to use this in accessors dealing with kernel virtual address,
pointers etc, but not for PTE values themelves.

Fixes: cJ2ff5cf2735c ("ARC: mm: Use virt_to_pfn() for addr >> PAGE_SHIFT pattern)
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

2519d753

ARC: Add missing io barriers to io{read,write}{16,32}be() · e5bc0478

由 Vineet Gupta 提交于 5月 05, 2016

While reviewing a different change to asm-generic/io.h Arnd spotted that
ARC ioread32 and ioread32be both of which come from asm-generic versions
are not symmetrical in terms of calling the io barriers.

generic ioread32   -> ARC readl()                  [ has barriers]
generic ioread32be -> __be32_to_cpu(__raw_readl()) [ lacks barriers]

While generic ioread32be is being remediated to call readl(), that involves
a swab32(), causing double swaps on ioread32be() on Big Endian systems.

So provide our versions of big endian IO accessors to ensure io barrier
calls while also keeping them optimal
Suggested-by: NArnd Bergmann <arnd@arndb.de>
Acked-by: NArnd Bergmann <arnd@arndb.de>
Cc: stable@vger.kernel.org  [4.2+]
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

e5bc0478

02 5月, 2016 1 次提交

dmaengine: dw: revisit data_width property · 2e65060e

由 Andy Shevchenko 提交于 4月 27, 2016

There several changes are done here:

- Convert the property to be in bytes

  Besides that this is a common practice for such property, the use of a value
  in bytes much more convenient than handling the encoded one.

- Rename data_width to data-width in the device tree bindings

  The change leaves the support for the old format as well just in case someone
  will use a newer kernel with an old device tree blob.

- While here, replace dwc_fast_ffs() by __ffs()
Signed-off-by: NAndy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: NViresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: NVinod Koul <vinod.koul@intel.com>

2e65060e

openanolis / cloud-kernel 接近 2 年 前同步成功

openanolis / cloud-kernel
接近 2 年前同步成功