• S
    rcu: Use __this_cpu_read() instead of per_cpu_ptr() · d860d403
    Shan Wei 提交于
    The __this_cpu_read() function produces better code than does
    per_cpu_ptr() on both ARM and x86.  For example, gcc (Ubuntu/Linaro
    4.7.3-12ubuntu1) 4.7.3 produces the following:
    
    ARMv7 per_cpu_ptr():
    
    force_quiescent_state:
        mov    r3, sp    @,
        bic    r1, r3, #8128    @ tmp171,,
        ldr    r2, .L98    @ tmp169,
        bic    r1, r1, #63    @ tmp170, tmp171,
        ldr    r3, [r0, #220]    @ __ptr, rsp_6(D)->rda
        ldr    r1, [r1, #20]    @ D.35903_68->cpu, D.35903_68->cpu
        mov    r6, r0    @ rsp, rsp
        ldr    r2, [r2, r1, asl #2]    @ tmp173, __per_cpu_offset
        add    r3, r3, r2    @ tmp175, __ptr, tmp173
        ldr    r5, [r3, #12]    @ rnp_old, D.29162_13->mynode
    
    ARMv7 __this_cpu_read():
    
    force_quiescent_state:
        ldr    r3, [r0, #220]    @ rsp_7(D)->rda, rsp_7(D)->rda
        mov    r6, r0    @ rsp, rsp
        add    r3, r3, #12    @ __ptr, rsp_7(D)->rda,
        ldr    r5, [r2, r3]    @ rnp_old, *D.29176_13
    
    Using gcc 4.8.2:
    
    x86_64 per_cpu_ptr():
    
        movl %gs:cpu_number,%edx    # cpu_number, pscr_ret__
        movslq    %edx, %rdx    # pscr_ret__, pscr_ret__
        movq    __per_cpu_offset(,%rdx,8), %rdx    # __per_cpu_offset, tmp93
        movq    %rdi, %r13    # rsp, rsp
        movq    1000(%rdi), %rax    # rsp_9(D)->rda, __ptr
        movq    24(%rdx,%rax), %r12    # _15->mynode, rnp_old
    
    x86_64 __this_cpu_read():
    
        movq    %rdi, %r13    # rsp, rsp
        movq    1000(%rdi), %rax    # rsp_9(D)->rda, rsp_9(D)->rda
        movq %gs:24(%rax),%r12    # _10->mynode, rnp_old
    
    Because this change produces significant benefits for these two very
    diverse architectures, this commit makes this change.
    Signed-off-by: NShan Wei <davidshan@tencent.com>
    Acked-by: NChristoph Lameter <cl@linux.com>
    Signed-off-by: NPranith Kumar <bobby.prani@gmail.com>
    Signed-off-by: NPaul E. McKenney <paulmck@linux.vnet.ibm.com>
    Reviewed-by: NJosh Triplett <josh@joshtriplett.org>
    Reviewed-by: NLai Jiangshan <laijs@cn.fujitsu.com>
    d860d403
tree.c 116.2 KB