提交 f38d999c 编写于 作者: W Will Deacon

ARM: atomics: prefetch the destination word for write prior to strex

The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch prefixes our atomic access implementations with pldw
instructions (on CPUs which support them) to try and grab the line in
exclusive state from the start. Only the barrier-less functions are
updated, since memory barriers can limit the usefulness of prefetching
data.
Acked-by: NNicolas Pitre <nico@linaro.org>
Signed-off-by: NWill Deacon <will.deacon@arm.com>
上级 9bb17be0
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define __ASM_ARM_ATOMIC_H #define __ASM_ARM_ATOMIC_H
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/prefetch.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/irqflags.h> #include <linux/irqflags.h>
#include <asm/barrier.h> #include <asm/barrier.h>
...@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v) ...@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v)
unsigned long tmp; unsigned long tmp;
int result; int result;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic_add\n" __asm__ __volatile__("@ atomic_add\n"
"1: ldrex %0, [%3]\n" "1: ldrex %0, [%3]\n"
" add %0, %0, %4\n" " add %0, %0, %4\n"
...@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v) ...@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v)
unsigned long tmp; unsigned long tmp;
int result; int result;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic_sub\n" __asm__ __volatile__("@ atomic_sub\n"
"1: ldrex %0, [%3]\n" "1: ldrex %0, [%3]\n"
" sub %0, %0, %4\n" " sub %0, %0, %4\n"
...@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) ...@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
{ {
unsigned long tmp, tmp2; unsigned long tmp, tmp2;
prefetchw(addr);
__asm__ __volatile__("@ atomic_clear_mask\n" __asm__ __volatile__("@ atomic_clear_mask\n"
"1: ldrex %0, [%3]\n" "1: ldrex %0, [%3]\n"
" bic %0, %0, %4\n" " bic %0, %0, %4\n"
...@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i) ...@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i)
{ {
u64 tmp; u64 tmp;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic64_set\n" __asm__ __volatile__("@ atomic64_set\n"
"1: ldrexd %0, %H0, [%2]\n" "1: ldrexd %0, %H0, [%2]\n"
" strexd %0, %3, %H3, [%2]\n" " strexd %0, %3, %H3, [%2]\n"
...@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) ...@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
u64 result; u64 result;
unsigned long tmp; unsigned long tmp;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic64_add\n" __asm__ __volatile__("@ atomic64_add\n"
"1: ldrexd %0, %H0, [%3]\n" "1: ldrexd %0, %H0, [%3]\n"
" adds %0, %0, %4\n" " adds %0, %0, %4\n"
...@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) ...@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
u64 result; u64 result;
unsigned long tmp; unsigned long tmp;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic64_sub\n" __asm__ __volatile__("@ atomic64_sub\n"
"1: ldrexd %0, %H0, [%3]\n" "1: ldrexd %0, %H0, [%3]\n"
" subs %0, %0, %4\n" " subs %0, %0, %4\n"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册