• N
    fs: use fast counters for vfs caches · 3e880fb5
    Nick Piggin 提交于
    percpu_counter library generates quite nasty code, so unless you need
    to dynamically allocate counters or take fast approximate value, a
    simple per cpu set of counters is much better.
    
    The percpu_counter can never be made to work as well, because it has an
    indirection from pointer to percpu memory, and it can't use direct
    this_cpu_inc interfaces because it doesn't use static PER_CPU data, so
    code will always be worse.
    
    In the fastpath, it is the difference between this:
    
            incl %gs:nr_dentry      # nr_dentry
    
    and this:
    
            movl    percpu_counter_batch(%rip), %edx        # percpu_counter_batch,
            movl    $1, %esi        #,
            movq    $nr_dentry, %rdi        #,
            call    __percpu_counter_add    # (plus I clobber registers)
    
    __percpu_counter_add:
            pushq   %rbp    #
            movq    %rsp, %rbp      #,
            subq    $32, %rsp       #,
            movq    %rbx, -24(%rbp) #,
            movq    %r12, -16(%rbp) #,
            movq    %r13, -8(%rbp)  #,
            movq    %rdi, %rbx      # fbc, fbc
    #APP
    # 216 "/home/npiggin/usr/src/linux-2.6/arch/x86/include/asm/thread_info.h" 1
            movq %gs:kernel_stack,%rax      #, pfo_ret__
    # 0 "" 2
    #NO_APP
            incl    -8124(%rax)     # <variable>.preempt_count
            movq    32(%rdi), %r12  # <variable>.counters, tcp_ptr__
    #APP
    # 78 "lib/percpu_counter.c" 1
            add %gs:this_cpu_off, %r12      # this_cpu_off, tcp_ptr__
    # 0 "" 2
    #NO_APP
            movslq  (%r12),%r13     #* tcp_ptr__, tmp73
            movslq  %edx,%rax       # batch, batch
            addq    %rsi, %r13      # amount, count
            cmpq    %rax, %r13      # batch, count
            jge     .L27    #,
            negl    %edx    # tmp76
            movslq  %edx,%rdx       # tmp76, tmp77
            cmpq    %rdx, %r13      # tmp77, count
            jg      .L28    #,
    .L27:
            movq    %rbx, %rdi      # fbc,
            call    _raw_spin_lock  #
            addq    %r13, 8(%rbx)   # count, <variable>.count
            movq    %rbx, %rdi      # fbc,
            movl    $0, (%r12)      #,* tcp_ptr__
            call    _raw_spin_unlock        #
    .L29:
    #APP
    # 216 "/home/npiggin/usr/src/linux-2.6/arch/x86/include/asm/thread_info.h" 1
            movq %gs:kernel_stack,%rax      #, pfo_ret__
    # 0 "" 2
    #NO_APP
            decl    -8124(%rax)     # <variable>.preempt_count
            movq    -8136(%rax), %rax       #, D.14625
            testb   $8, %al #, D.14625
            jne     .L32    #,
    .L31:
            movq    -24(%rbp), %rbx #,
            movq    -16(%rbp), %r12 #,
            movq    -8(%rbp), %r13  #,
            leave
            ret
            .p2align 4,,10
            .p2align 3
    .L28:
            movl    %r13d, (%r12)   # count,*
            jmp     .L29    #
    .L32:
            call    preempt_schedule        #
            .p2align 4,,6
            jmp     .L31    #
            .size   __percpu_counter_add, .-__percpu_counter_add
            .p2align 4,,15
    Signed-off-by: NNick Piggin <npiggin@kernel.dk>
    3e880fb5
dcache.c 61.9 KB