提交 · fff7fb0b2d908dec779783d8eaf3d7725230f75e · openanolis / cloud-kernel

21 5月, 2016 1 次提交

lib/GCD.c: use binary GCD algorithm instead of Euclidean · fff7fb0b

由 Zhaoxiu Zeng 提交于 5月 20, 2016

The binary GCD algorithm is based on the following facts:
	1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
	2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
	3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)

Even on x86 machines with reasonable division hardware, the binary
algorithm runs about 25% faster (80% the execution time) than the
division-based Euclidian algorithm.

On platforms like Alpha and ARMv6 where division is a function call to
emulation code, it's even more significant.

There are two variants of the code here, depending on whether a fast
__ffs (find least significant set bit) instruction is available.  This
allows the unpredictable branches in the bit-at-a-time shifting loop to
be eliminated.

If fast __ffs is not available, the "even/odd" GCD variant is used.

I use the following code to benchmark:

	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <string.h>
	#include <time.h>
	#include <unistd.h>

	#define swap(a, b) \
		do { \
			a ^= b; \
			b ^= a; \
			a ^= b; \
		} while (0)

	unsigned long gcd0(unsigned long a, unsigned long b)
	{
		unsigned long r;

		if (a < b) {
			swap(a, b);
		}

		if (b == 0)
			return a;

		while ((r = a % b) != 0) {
			a = b;
			b = r;
		}

		return b;
	}

	unsigned long gcd1(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		b >>= __builtin_ctzl(b);

		for (;;) {
			a >>= __builtin_ctzl(a);
			if (a == b)
				return a << __builtin_ctzl(r);

			if (a < b)
				swap(a, b);
			a -= b;
		}
	}

	unsigned long gcd2(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		r &= -r;

		while (!(b & r))
			b >>= 1;

		for (;;) {
			while (!(a & r))
				a >>= 1;
			if (a == b)
				return a;

			if (a < b)
				swap(a, b);
			a -= b;
			a >>= 1;
			if (a & r)
				a += b;
			a >>= 1;
		}
	}

	unsigned long gcd3(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		b >>= __builtin_ctzl(b);
		if (b == 1)
			return r & -r;

		for (;;) {
			a >>= __builtin_ctzl(a);
			if (a == 1)
				return r & -r;
			if (a == b)
				return a << __builtin_ctzl(r);

			if (a < b)
				swap(a, b);
			a -= b;
		}
	}

	unsigned long gcd4(unsigned long a, unsigned long b)
	{
		unsigned long r = a | b;

		if (!a || !b)
			return r;

		r &= -r;

		while (!(b & r))
			b >>= 1;
		if (b == r)
			return r;

		for (;;) {
			while (!(a & r))
				a >>= 1;
			if (a == r)
				return r;
			if (a == b)
				return a;

			if (a < b)
				swap(a, b);
			a -= b;
			a >>= 1;
			if (a & r)
				a += b;
			a >>= 1;
		}
	}

	static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
		gcd0, gcd1, gcd2, gcd3, gcd4,
	};

	#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))

	#if defined(__x86_64__)

	#define rdtscll(val) do { \
		unsigned long __a,__d; \
		__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
		(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
	} while(0)

	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
								unsigned long a, unsigned long b, unsigned long *res)
	{
		unsigned long long start, end;
		unsigned long long ret;
		unsigned long gcd_res;

		rdtscll(start);
		gcd_res = gcd(a, b);
		rdtscll(end);

		if (end >= start)
			ret = end - start;
		else
			ret = ~0ULL - start + 1 + end;

		*res = gcd_res;
		return ret;
	}

	#else

	static inline struct timespec read_time(void)
	{
		struct timespec time;
		clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
		return time;
	}

	static inline unsigned long long diff_time(struct timespec start, struct timespec end)
	{
		struct timespec temp;

		if ((end.tv_nsec - start.tv_nsec) < 0) {
			temp.tv_sec = end.tv_sec - start.tv_sec - 1;
			temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
		} else {
			temp.tv_sec = end.tv_sec - start.tv_sec;
			temp.tv_nsec = end.tv_nsec - start.tv_nsec;
		}

		return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
	}

	static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
								unsigned long a, unsigned long b, unsigned long *res)
	{
		struct timespec start, end;
		unsigned long gcd_res;

		start = read_time();
		gcd_res = gcd(a, b);
		end = read_time();

		*res = gcd_res;
		return diff_time(start, end);
	}

	#endif

	static inline unsigned long get_rand()
	{
		if (sizeof(long) == 8)
			return (unsigned long)rand() << 32 | rand();
		else
			return rand();
	}

	int main(int argc, char **argv)
	{
		unsigned int seed = time(0);
		int loops = 100;
		int repeats = 1000;
		unsigned long (*res)[TEST_ENTRIES];
		unsigned long long elapsed[TEST_ENTRIES];
		int i, j, k;

		for (;;) {
			int opt = getopt(argc, argv, "n:r:s:");
			/* End condition always first */
			if (opt == -1)
				break;

			switch (opt) {
			case 'n':
				loops = atoi(optarg);
				break;
			case 'r':
				repeats = atoi(optarg);
				break;
			case 's':
				seed = strtoul(optarg, NULL, 10);
				break;
			default:
				/* You won't actually get here. */
				break;
			}
		}

		res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
		memset(elapsed, 0, sizeof(elapsed));

		srand(seed);
		for (j = 0; j < loops; j++) {
			unsigned long a = get_rand();
			/* Do we have args? */
			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
			unsigned long long min_elapsed[TEST_ENTRIES];
			for (k = 0; k < repeats; k++) {
				for (i = 0; i < TEST_ENTRIES; i++) {
					unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
					if (k == 0 || min_elapsed[i] > tmp)
						min_elapsed[i] = tmp;
				}
			}
			for (i = 0; i < TEST_ENTRIES; i++)
				elapsed[i] += min_elapsed[i];
		}

		for (i = 0; i < TEST_ENTRIES; i++)
			printf("gcd%d: elapsed %llu\n", i, elapsed[i]);

		k = 0;
		srand(seed);
		for (j = 0; j < loops; j++) {
			unsigned long a = get_rand();
			unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
			for (i = 1; i < TEST_ENTRIES; i++) {
				if (res[j][i] != res[j][0])
					break;
			}
			if (i < TEST_ENTRIES) {
				if (k == 0) {
					k = 1;
					fprintf(stderr, "Error:\n");
				}
				fprintf(stderr, "gcd(%lu, %lu): ", a, b);
				for (i = 0; i < TEST_ENTRIES; i++)
					fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
			}
		}

		if (k == 0)
			fprintf(stderr, "PASS\n");

		free(res);

		return 0;
	}

Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:

  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 10174
  gcd1: elapsed 2120
  gcd2: elapsed 2902
  gcd3: elapsed 2039
  gcd4: elapsed 2812
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9309
  gcd1: elapsed 2280
  gcd2: elapsed 2822
  gcd3: elapsed 2217
  gcd4: elapsed 2710
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9589
  gcd1: elapsed 2098
  gcd2: elapsed 2815
  gcd3: elapsed 2030
  gcd4: elapsed 2718
  PASS
  zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
  gcd0: elapsed 9914
  gcd1: elapsed 2309
  gcd2: elapsed 2779
  gcd3: elapsed 2228
  gcd4: elapsed 2709
  PASS

[akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
Signed-off-by: NZhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
Signed-off-by: NGeorge Spelvin <linux@horizon.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>

fff7fb0b

09 5月, 2016 4 次提交

ARC: Add eznps platform to Kconfig and Makefile · 96665789

由 Noam Camus 提交于 10月 16, 2015

This commit should be left last since only now eznps platform
is in state which one can actually use.
Signed-off-by: NNoam Camus <noamc@ezchip.com>

96665789

ARC: Make vmalloc size configurable · 15ca68a9

由 Noam Camus 提交于 9月 07, 2014

On ARC, lower 2G of address space is translated and used for
 - user vaddr space (region 0 to 5)
 - unused kernel-user gutter (region 6)
 - kernel vaddr space (region 7)

where each region simply represents 256MB of address space.

The kernel vaddr space of 256MB is used to implement vmalloc, modules
So far this was enough, but not on EZChip system with 4K CPUs (given
that per cpu mechanism uses vmalloc for allocating chunks)

So allow VMALLOC_SIZE to be configurable by expanding down into the unused
kernel-user gutter region which at default 256M was excessive anyways.

Also use _BITUL() to fix a build error since PGDIR_SIZE cannot use "1UL"
as called from assembly code in mm/tlbex.S
Signed-off-by: NNoam Camus <noamc@ezchip.com>
[vgupta: rewrote changelog, debugged bootup crash due to int vs. hex]
Acked-by: NVineet Gupta <vgupta@synopsys.com>

15ca68a9

ARC: [intc-*] Do a domain lookup in primary handler for hwirq -> linux virq · 1b0ccb8a

由 Vineet Gupta 提交于 1月 01, 2016

The primary interrupt handler arch_do_IRQ() was passing hwirq as linux
virq to core code. This was fragile and worked so far as we only had legacy/linear
domains.

This came out of a rant by Marc Zyngier.
http://lists.infradead.org/pipermail/linux-snps-arc/2015-December/000298.html

Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Noam Camus <noamc@ezchip.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

1b0ccb8a

ARC: clockevent: Prepare for DT based probe · 69fbd098

由 Noam Camus 提交于 1月 14, 2016

 - call clocksource_probe()
 - This in turns needs of_clk_init() to be called earlier

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: NNoam Camus <noamc@ezchip.com>
[vgupta: broken off from a bigger patch]
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

69fbd098

05 5月, 2016 1 次提交

ARC: support HIGHMEM even without PAE40 · 26f9d5fd

由 Vineet Gupta 提交于 4月 18, 2016

Initial HIGHMEM support on ARC was introduced for PAE40 where the low
memory (0x8000_0000 based) and high memory (0x1_0000_0000) were
physically contiguous. So CONFIG_FLATMEM sufficed (despite a peipheral
hole in the middle, which wasted a bit of struct page memory, but things
worked).

However w/o PAE, highmem was not possible and we could only reach
~1.75GB of DDR. Now there is a use case to access ~4GB of DDR w/o PAE40
The idea is to have low memory at canonical 0x8000_0000 and highmem
at 0 so enire 4GB address space is available for physical addressing
This needs additional platform/interconnect mapping to convert
the non contiguous physical addresses into linear bus adresses.

From Linux point of view, non contiguous divide means FLATMEM no
longer works and DISCONTIGMEM is needed to track the pfns in the 2
regions.

This scheme would also work for PAE40, only better in that we don't
waste struct page memory for the peripheral hole.

The DT description will be something like

    memory {
        ...
        reg = <0x80000000 0x200000000   /* 512MB: lowmem */
               0x00000000 0x10000000>;  /* 256MB: highmem */
   }
Signed-off-by: NNoam Camus <noamc@ezchip.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

26f9d5fd

27 4月, 2016 2 次提交

ARC: add support for reserved memory defined by device tree · 1b10cb21

由 Alexey Brodkin 提交于 4月 26, 2016

Enable reserved memory initialization from device tree.
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

1b10cb21

ARC: support generic per-device coherent dma mem · 32ed9a0e

由 Alexey Brodkin 提交于 4月 26, 2016

Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

32ed9a0e

07 4月, 2016 1 次提交

ARC: Don't source drivers/pci/pcie/Kconfig ourselves · 732dc97b

由 Andreas Ziegler 提交于 3月 15, 2016

Commit 5f8fc432 ("PCI: Include pci/pcie/Kconfig directly from
pci/Kconfig") in linux-next changed drivers/pci/Kconfig to include
drivers/pci/pcie/Kconfig itself, so that architectures do not need
to source both files themselves. ARC just recently gained PCI support
through commit 6b3fb77998dd ("ARC: Add PCI support"), but this change
was based on the old behaviour of the Kconfig files. This makes
Kconfig now spit out the following warnings:

drivers/pci/pcie/Kconfig:61:warning: choice value used outside its choice group
drivers/pci/pcie/Kconfig:67:warning: choice value used outside its choice group
drivers/pci/pcie/Kconfig:74:warning: choice value used outside its choice group

This change updates the Kconfig file for ARC, dropping the now
unnecessary 'source' statement, which makes the warning disappear.
Signed-off-by: NAndreas Ziegler <andreas.ziegler@fau.de>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

732dc97b

19 3月, 2016 1 次提交
- V
  ARC: dma: reintroduce platform specific dma<->phys · f2e3d553
  由 Vineet Gupta 提交于 3月 16, 2016
```
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
```
  f2e3d553
15 3月, 2016 1 次提交
- V
  ARCv2: LLSC: software backoff is NOT needed starting HS2.1c · b31ac426
  由 Vineet Gupta 提交于 3月 15, 2016
```
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
```
  b31ac426
12 3月, 2016 1 次提交

ARCv2: Allow enabling PAE40 w/o HIGHMEM · 7cab91b8

由 Vineet Gupta 提交于 2月 17, 2016

This allows for regression testing in PAE specific code as we lack
a 32+ bit physical memory platform other than nSIM.
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

7cab91b8

11 3月, 2016 1 次提交

ARC: Add PCI support · c1678ffc

由 Joao Pinto 提交于 3月 10, 2016

Add PCI support to ARC and update drivers/pci Makefile enabling the ARC
arch to use the generic PCI setup functions.

[bhelgaas: fold in Joao's pci-dma-compat.h & pci-bridge.h build fix (I
should have caught this myself, sorry]
Signed-off-by: NJoao Pinto <jpinto@synopsys.com>
Signed-off-by: NBjorn Helgaas <bhelgaas@google.com>
Acked-by: NVineet Gupta <vgupta@synopsys.com>

c1678ffc

24 2月, 2016 1 次提交

ARC: SMP: No need for CONFIG_ARC_IPI_DBG · d73b73f5

由 Vineet Gupta 提交于 2月 19, 2016

This was more relevant during SMP bringup.

The warning for bogus msg better be visible always.
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

d73b73f5

23 2月, 2016 1 次提交

arc: get rid of DEVTMPFS dependency on INITRAMFS_SOURCE · 3e5177c1

由 Alexey Brodkin 提交于 2月 20, 2016

Even though DEVTMPFS is required when our pre-built initramfs
is used it is not the case in general. It is perfectly possible
to use initramfs with device nodes already populated or there
could be other usages, see discussion below for more detials:
http://thread.gmane.org/gmane.comp.embedded.openwrt.devel/37819/focus=37821

This change removes mentioned dependency from arch/arc/Kconfig
updating instead those defconfigs that are usually used with this
kind of pre-build initramfs.

And while at it all touched defconfigs were regenerated via
savedefconfig and some options were removed:
 * USB is selected by other options implicitly
 * VGA_CONSOLE is disableb for ARC since
   031e29b5
 * EXT3_FS automatically selects EXT4_FS
 * MTDxxx and JFFS2_FS make no sense for AXS because
   AXS NAND controller is not upstreamed
 * NET_OSCI_LAN is not in upstream as well
 * ARCPGU_xxx options make no sense because ARC PGU is not yet
   in upstream and when it gets there all config options would
   be taken from devicetree
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

3e5177c1

18 2月, 2016 1 次提交

ARC: Assume multiplier is always present · 0eca6fdb

由 Vineet Gupta 提交于 2月 16, 2016

It is unlikely that designs running Linux will not have multiplier.
Further the current support is not complete as tool don't generate a
multilib w/o multiplier.
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

0eca6fdb

12 2月, 2016 1 次提交

ARC: mm: Introduce explicit super page size support · 37eda9df

由 Vineet Gupta 提交于 2月 10, 2016

MMUv4 supports 2 concurrent page sizes: Normal and Super [4K to 16M]

So far Linux supported a single super page size for a given Normal page,
depending on the software page walking address split.
e.g. we had 11:8:13 address split for 8K page, which meant super page
was 2 ^(8+13) = 2M (given that THP size has to be PMD_SHIFT)

Now we turn this around, by allowing multiple Super Pages in Kconfig
(currently 2M and 16M only) and forcing page walker address split to
PGDIR_SHIFT and PAGE_SHIFT

For configs without Super page, things are same as before and
PGDIR_SHIFT can be hacked to get non default address split

The motivation for this change is a customer who needs 16M super page
and a 8K Normal page combo.
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

37eda9df

29 1月, 2016 1 次提交

ARCv2: clocksource: Rename GRTC -> GFRC ... · d584f0fb

由 Vineet Gupta 提交于 1月 22, 2016

... it is now called Global Free Running Counter
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

d584f0fb

21 1月, 2016 2 次提交

dma-mapping: always provide the dma_map_ops based implementation · e1c7e324

由 Christoph Hellwig 提交于 1月 20, 2016

Move the generic implementation to <linux/dma-mapping.h> now that all
architectures support it and remove the HAVE_DMA_ATTR Kconfig symbol now
that everyone supports them.

[valentinrothberg@gmail.com: remove leftovers in Kconfig]
Signed-off-by: NChristoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Helge Deller <deller@gmx.de>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Steven Miao <realmz6@gmail.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: NValentin Rothberg <valentinrothberg@gmail.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>

e1c7e324

arc: convert to dma_map_ops · 052c96db

由 Christoph Hellwig 提交于 1月 20, 2016

[vgupta@synopsys.com: ARC: dma mapping fixes #2]
Signed-off-by: NChristoph Hellwig <hch@lst.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
Cc: Carlos Palminha <CARLOS.PALMINHA@synopsys.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>

052c96db

17 1月, 2016 1 次提交

Kconfig: remove HAVE_LATENCYTOP_SUPPORT · da48d094

由 Will Deacon 提交于 1月 15, 2016

As illustrated by commit a3afe70b ("[S390] latencytop s390
support."), HAVE_LATENCYTOP_SUPPORT is defined by an architecture to
advertise an implementation of save_stack_trace_tsk.

However, as of 9212ddb5 ("stacktrace: provide save_stack_trace_tsk()
weak alias") a dummy implementation is provided if STACKTRACE=y.  Given
that LATENCYTOP already depends on STACKTRACE_SUPPORT and selects
STACKTRACE, we can remove HAVE_LATENCYTOP_SUPPORT altogether.
Signed-off-by: NWill Deacon <will.deacon@arm.com>
Acked-by: NHeiko Carstens <heiko.carstens@de.ibm.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Helge Deller <deller@gmx.de>
Acked-by: NMichael Ellerman <mpe@ellerman.id.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>

da48d094

17 12月, 2015 1 次提交

ARC: [plat-sim] unbork non default CONFIG_LINUX_LINK_BASE · ff1c0b6a

由 Vineet Gupta 提交于 12月 15, 2015

HIGHMEM support bumped the default memory size for nsim platform to 1G.
Thus total memory ended at the very edge of start of peripherals address
space. With linux link base shifted, memory started bleeding into
peripheral space which caused early boot bad_page spew !

Fixes: 29e33226 ("ARC: mm: HIGHMEM: populate high memory from DT")
Reported-by: NAnton Kolesov <akolesov@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

ff1c0b6a

29 10月, 2015 1 次提交

ARC: mm: PAE40 support · 5a364c2a

由 Vineet Gupta 提交于 2月 06, 2015

This is the first working implementation of 40-bit physical address
extension on ARCv2.
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

5a364c2a

28 10月, 2015 2 次提交

ARC: mm: HIGHMEM: kmap API implementation · 45890f6d

由 Vineet Gupta 提交于 3月 09, 2015

Implement kmap* API for ARC.

This enables
 - permanent kernel maps (pkmaps): :kmap() API
 - fixmap : kmap_atomic()

We use a very simple/uniform approach for both (unlike some of the other
arches). So fixmap doesn't use the customary compile time address stuff.
The important semantic is sleep'ability (pkmap) vs. not (fixmap) which
the API guarantees.

Note that this patch only enables highmem for subsequent PAE40 support
as there is no real highmem for ARC in pure 32-bit paradigm as explained
below.

ARC has 2:2 address split of the 32-bit address space with lower half
being translated (virtual) while upper half unstranslated
(0x8000_0000 to 0xFFFF_FFFF). kernel itself is linked at base of
unstranslated space (i.e. 0x8000_0000 onwards), which is mapped to say
DDR 0x0 by external Bus Glue logic (outside the core). So kernel can
potentially access 1.75G worth of memory directly w/o need for highmem.
(the top 256M is taken by uncached peripheral space from 0xF000_0000 to
0xFFFF_FFFF)

In PAE40, hardware can address memory beyond 4G (0x1_0000_0000) while
the logical/virtual addresses remain 32-bits. Thus highmem is required
for kernel proper to be able to access these pages for it's own purposes
(user space is agnostic to this anyways).
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

45890f6d

ARC: boot: Support Halt-on-reset and Run-on-reset SMP booting modes · 3971cdc2

由 Vineet Gupta 提交于 10月 09, 2015

For Run-on-reset, non masters need to spin wait. For Halt-on-reset they
can jump to entry point directly.

Also while at it, made reset vector handler as "the" entry point for
kernel including host debugger based boot (which uses the ELF header
entry point)
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

3971cdc2

17 10月, 2015 2 次提交

V
ARC: mm: MMU v1..v3 only selectable for ARCompact ISA based cores · c583ee4f
由 Vineet Gupta 提交于 9月 29, 2015
```
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
```
c583ee4f

ARCv2: mm: THP support · fe6c1b86

由 Vineet Gupta 提交于 7月 08, 2014

MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP
support.

Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a
new bit "SZ" in TLB page desciptor to distinguish between them.
Super Page size is configurable in hardware (4K to 16M), but fixed once
RTL builds.

The exact THP size a Linx configuration will support is a function of:
 - MMU page size (typical 8K, RTL fixed)
 - software page walker address split between PGD:PTE:PFN (typical
   11:8:13, but can be changed with 1 line)

So for above default, THP size supported is 8K * 256 = 2M

Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime
reduces to 1 level (as PTE is folded into PGD and canonically referred
to as PMD).

Thus thp PMD accessors are implemented in terms of PTE (just like sparc)
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

fe6c1b86

20 8月, 2015 1 次提交

ARC: Enable HAVE_FUTEX_CMPXCHG · 5e057429

由 Vineet Gupta 提交于 8月 06, 2015

ARC doesn't need the runtime detection of futex cmpxchg op

Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

5e057429

11 8月, 2015 1 次提交

ARC: Enable optimistic spinning for LLSC config · 2a440168

由 Vineet Gupta 提交于 8月 08, 2015

Suggested-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

2a440168

04 8月, 2015 1 次提交

ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff · e78fdfef

由 Vineet Gupta 提交于 7月 14, 2015

This is to workaround the llock/scond livelock

HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
coherency transactions in the SCU. The exclusive line state keeps rotating
among contenting cores leading to a never ending cycle. So break the cycle
by deferring the retry of failed exclusive access (SCOND). The actual delay
needed is function of number of contending cores as well as the unrelated
coherency traffic from other cores. To keep the code simple, start off with
small delay of 1 which would suffice most cases and in case of contention
double the delay. Eventually the delay is sufficient such that the coherency
pipeline is drained, thus a subsequent exclusive access would succeed.

Link: http://lkml.kernel.org/r/1438612568-28265-1-git-send-email-vgupta@synopsys.comAcked-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

e78fdfef

23 7月, 2015 1 次提交

ARCv2: allow selection of page size for MMUv4 · 450ed0db

由 Alexey Brodkin 提交于 7月 16, 2015

MMUv4 also supports the configurable page size as MMUv3.
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

450ed0db

20 7月, 2015 1 次提交

ARCv2: add knob for DIV_REV in Kconfig · d05a76ab

由 Alexey Brodkin 提交于 7月 16, 2015

Being highly configurable core ARC HS among other features might be
configured with or without DIV_REM_OPTION (hardware divider).

That option when enabled adds following instructions: div, divu, rem, remu.

By default ARC HS38 has this option enabled. So we add here possibility
to disable usage of hardware divider by compiler.
Signed-off-by: NAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

d05a76ab

06 7月, 2015 1 次提交
- V
  ARC: Kconfig: better way to disable ARC_HAS_LLSC for ARC_CPU_750D · 14a0abfc
  由 Vineet Gupta 提交于 6月 26, 2015
```
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
```
  14a0abfc
25 6月, 2015 1 次提交
- V
  ARCv2: All bits in place, allow ARCv2 builds · 65bfbcdf
  由 Vineet Gupta 提交于 3月 09, 2015
```
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
```
  65bfbcdf
22 6月, 2015 6 次提交

ARCv2: SMP: clocksource: Enable Global Real Time counter · 72d72880

由 Vineet Gupta 提交于 12月 24, 2014

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

72d72880

ARCv2: SMP: ARConnect debug/robustness · aa6083ed

由 Vineet Gupta 提交于 11月 07, 2014

- Handle possible interrupt coalescing from MCIP
- chk if prev IPI ack before sending new
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

aa6083ed

ARCv2: SMP: Support ARConnect (MCIP) for Inter-Core-Interrupts et al · 82fea5a1

由 Vineet Gupta 提交于 9月 10, 2014

Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

82fea5a1

ARCv2: clocksource: Introduce 64bit local RTC counter · aa93e8ef

由 Vineet Gupta 提交于 11月 07, 2013

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

aa93e8ef

ARCv2: MMUv4: cache programming model changes · d1f317d8

由 Vineet Gupta 提交于 4月 06, 2015

Caveats about cache flush on ARCv2 based cores

- dcache is PIPT so paddr is sufficient for cache maintenance ops (no
  need to setup PTAG reg

- icache is still VIPT but only aliasing configs need PTAG setup

So basically this is departure from MMU-v3 which always need vaddr in
line ops registers (DC_IVDL, DC_FLDL, IC_IVIL) but paddr in DC_PTAG,
IC_PTAG respectively.
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>

d1f317d8

V
ARCv2: MMUv4: TLB programming Model changes · d7a512bf
由 Vineet Gupta 提交于 4月 06, 2015
```
Signed-off-by: NVineet Gupta <vgupta@synopsys.com>
```
d7a512bf

openanolis / cloud-kernel 1 年多 前同步成功

openanolis / cloud-kernel
1 年多前同步成功