提交 1b5ca121 编写于 作者: N Nicholas Piggin 提交者: Tejun Heo

percpu: improve generic percpu modify-return implementation

Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.

Work around this by finding the pointer first, then operating on that.

It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.

On powerpc, __this_cpu_inc_return compiles to:

        ld 10,48(13)
        ldx 9,3,10
        addi 9,9,1
        stdx 9,3,10
        ld 9,48(13)
        ldx 3,9,3

With this patch it compiles to:

        ld 10,48(13)
        ldx 9,3,10
        addi 9,9,1
        stdx 9,3,10
Signed-off-by: NNicholas Piggin <npiggin@gmail.com>
To: Tejun Heo <tj@kernel.org>
To: Christoph Lameter <cl@linux.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Signed-off-by: NTejun Heo <tj@kernel.org>
上级 a67823c1
...@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void); ...@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES #define PER_CPU_DEF_ATTRIBUTES
#endif #endif
#define raw_cpu_generic_read(pcp) \
({ \
*raw_cpu_ptr(&(pcp)); \
})
#define raw_cpu_generic_to_op(pcp, val, op) \ #define raw_cpu_generic_to_op(pcp, val, op) \
do { \ do { \
*raw_cpu_ptr(&(pcp)) op val; \ *raw_cpu_ptr(&(pcp)) op val; \
...@@ -72,34 +77,39 @@ do { \ ...@@ -72,34 +77,39 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \ #define raw_cpu_generic_add_return(pcp, val) \
({ \ ({ \
raw_cpu_add(pcp, val); \ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
raw_cpu_read(pcp); \ \
*__p += val; \
*__p; \
}) })
#define raw_cpu_generic_xchg(pcp, nval) \ #define raw_cpu_generic_xchg(pcp, nval) \
({ \ ({ \
typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \ typeof(pcp) __ret; \
__ret = raw_cpu_read(pcp); \ __ret = *__p; \
raw_cpu_write(pcp, nval); \ *__p = nval; \
__ret; \ __ret; \
}) })
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ #define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \ ({ \
typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \ typeof(pcp) __ret; \
__ret = raw_cpu_read(pcp); \ __ret = *__p; \
if (__ret == (oval)) \ if (__ret == (oval)) \
raw_cpu_write(pcp, nval); \ *__p = nval; \
__ret; \ __ret; \
}) })
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \ ({ \
typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \
typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \
int __ret = 0; \ int __ret = 0; \
if (raw_cpu_read(pcp1) == (oval1) && \ if (*__p1 == (oval1) && *__p2 == (oval2)) { \
raw_cpu_read(pcp2) == (oval2)) { \ *__p1 = nval1; \
raw_cpu_write(pcp1, nval1); \ *__p2 = nval2; \
raw_cpu_write(pcp2, nval2); \
__ret = 1; \ __ret = 1; \
} \ } \
(__ret); \ (__ret); \
...@@ -109,7 +119,7 @@ do { \ ...@@ -109,7 +119,7 @@ do { \
({ \ ({ \
typeof(pcp) __ret; \ typeof(pcp) __ret; \
preempt_disable(); \ preempt_disable(); \
__ret = *this_cpu_ptr(&(pcp)); \ __ret = raw_cpu_generic_read(pcp); \
preempt_enable(); \ preempt_enable(); \
__ret; \ __ret; \
}) })
...@@ -118,17 +128,17 @@ do { \ ...@@ -118,17 +128,17 @@ do { \
do { \ do { \
unsigned long __flags; \ unsigned long __flags; \
raw_local_irq_save(__flags); \ raw_local_irq_save(__flags); \
*raw_cpu_ptr(&(pcp)) op val; \ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \ raw_local_irq_restore(__flags); \
} while (0) } while (0)
#define this_cpu_generic_add_return(pcp, val) \ #define this_cpu_generic_add_return(pcp, val) \
({ \ ({ \
typeof(pcp) __ret; \ typeof(pcp) __ret; \
unsigned long __flags; \ unsigned long __flags; \
raw_local_irq_save(__flags); \ raw_local_irq_save(__flags); \
raw_cpu_add(pcp, val); \ __ret = raw_cpu_generic_add_return(pcp, val); \
__ret = raw_cpu_read(pcp); \
raw_local_irq_restore(__flags); \ raw_local_irq_restore(__flags); \
__ret; \ __ret; \
}) })
...@@ -138,8 +148,7 @@ do { \ ...@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \ typeof(pcp) __ret; \
unsigned long __flags; \ unsigned long __flags; \
raw_local_irq_save(__flags); \ raw_local_irq_save(__flags); \
__ret = raw_cpu_read(pcp); \ __ret = raw_cpu_generic_xchg(pcp, nval); \
raw_cpu_write(pcp, nval); \
raw_local_irq_restore(__flags); \ raw_local_irq_restore(__flags); \
__ret; \ __ret; \
}) })
...@@ -149,9 +158,7 @@ do { \ ...@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \ typeof(pcp) __ret; \
unsigned long __flags; \ unsigned long __flags; \
raw_local_irq_save(__flags); \ raw_local_irq_save(__flags); \
__ret = raw_cpu_read(pcp); \ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
if (__ret == (oval)) \
raw_cpu_write(pcp, nval); \
raw_local_irq_restore(__flags); \ raw_local_irq_restore(__flags); \
__ret; \ __ret; \
}) })
...@@ -168,16 +175,16 @@ do { \ ...@@ -168,16 +175,16 @@ do { \
}) })
#ifndef raw_cpu_read_1 #ifndef raw_cpu_read_1
#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) #define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif #endif
#ifndef raw_cpu_read_2 #ifndef raw_cpu_read_2
#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) #define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif #endif
#ifndef raw_cpu_read_4 #ifndef raw_cpu_read_4
#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) #define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif #endif
#ifndef raw_cpu_read_8 #ifndef raw_cpu_read_8
#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) #define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif #endif
#ifndef raw_cpu_write_1 #ifndef raw_cpu_write_1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册