1. 05 4月, 2018 1 次提交
  2. 04 4月, 2018 1 次提交
    • A
      perf annotate browser: Show extra title line with event information · 6920e285
      Arnaldo Carvalho de Melo 提交于
      So at the top we'll have two lines, like this, from 'perf report':
      
        # perf report --group --ignore-vmlinux
      =====================================================================================================
      Samples: 46  of events 'cycles', 4000 Hz, Event count (approx.): 5154895
      _raw_spin_lock_irqsave  /proc/kcore
      Percent              │      nop
                           │      push   %rbx
        0.00  14.29   0.00 │      pushfq
        9.09   0.00   0.00 │      pop    %rax
        9.09   0.00  20.00 │      nop
                           │      mov    %rax,%rbx
                           │      cli
        4.55   7.14   0.00 │      nop
                           │      xor    %eax,%eax
                           │      mov    $0x1,%edx
                           │      lock   cmpxchg %edx,(%rdi)
       77.27  78.57  70.00 │      test   %eax,%eax
                           │    ↓ jne    2b
                           │      mov    %rbx,%rax
        0.00   0.00  10.00 │      pop    %rbx
                           │    ← retq
                           │2b:   mov    %eax,%esi
                           │    → callq  queued_spin_lock_slowpath
                           │      mov    %rbx,%rax
                           │      pop    %rbx
      Press 'h' for help on│key bindings
      =====================================================================================================
      
       9.09 + 9.09 + 4.55 + 77.27 = 100
      14.29 + 7.14 + 78.57 = 100
      20 + 70 + 10 = 100
      
      We can do the math by using 't' to toggle from 'percent' to nr
      
      =====================================================================================================
      Samples: 46  of events 'cycles', 4000 Hz, Event count (approx.): 5154895
      _raw_spin_lock_irqsave  /proc/kcore
      Period                              │      nop
                                          │      push   %rbx
                0       79273           0 │      pushfq
           190455           0           0 │      pop    %rax
           198038           0        3045 │      nop
                                          │      mov    %rax,%rbx
                                          │      cli
           217233       32562           0 │      nop
                                          │      xor    %eax,%eax
                                          │      mov    $0x1,%edx
                                          │      lock   cmpxchg %edx,(%rdi)
          3421649      979174       28273 │      test   %eax,%eax
                                          │    ↓ jne    2b
                                          │      mov    %rbx,%rax
                0           0        5193 │      pop    %rbx
                                          │    ← retq
                                          │2b:   mov    %eax,%esi
                                          │    → callq  queued_spin_lock_slowpath
                                          │      mov    %rbx,%rax
                                          │      pop    %rbx
      Press 'h' for help on│key bindings
      =====================================================================================================
      
      79273 + 190455 + 198038 + 3045 + 217233 + 32562 + 3421649 + 979174 + 28273 + 5193 = 5154895
      
      Or number of samples:
      
      =====================================================================================================
      ooSamples: 46  of events 'cycles', 4000 Hz, Event count (approx.): 5154895
      _raw_spin_lock_irqsave  /proc/kcore
      Samples              │      nop
                           │      push   %rbx
           0      2      0 │      pushfq
           2      0      0 │      pop    %rax
           2      0      2 │      nop
                           │      mov    %rax,%rbx
                           │      cli
           1      1      0 │      nop
                           │      xor    %eax,%eax
                           │      mov    $0x1,%edx
                           │      lock   cmpxchg %edx,(%rdi)
          17     11      7 │      test   %eax,%eax
                           │    ↓ jne    2b
                           │      mov    %rbx,%rax
           0      0      1 │      pop    %rbx
                           │    ← retq
                           │2b:   mov    %eax,%esi
                           │    → callq  queued_spin_lock_slowpath
                           │      mov    %rbx,%rax
                           │      pop    %rbx
      Press 'h' for help on key bindings
      =====================================================================================================
      
      2 + 2 + 2 + 2 + 1 + 1 + 17 + 11 + 7 + 1 = 46
      Suggested-by: NMartin Liška <mliska@suse.cz>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196935
      Link: https://lkml.kernel.org/n/tip-ezccyxld50wtwyt66np6aomo@git.kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      6920e285
  3. 24 3月, 2018 2 次提交
    • A
      perf annotate: Support jumping from one function to another · e4cc91b8
      Arnaldo Carvalho de Melo 提交于
      For instance:
      
        entry_SYSCALL_64  /lib/modules/4.16.0-rc5-00086-gdf09348f/build/vmlinux
          5.50 │     → callq  do_syscall_64
         14.56 │       mov    0x58(%rsp),%rcx
          7.44 │       mov    0x80(%rsp),%r11
          0.32 │       cmp    %rcx,%r11
               │     → jne    swapgs_restore_regs_and_return_to_usermode
          0.32 │       shl    $0x10,%rcx
          0.32 │       sar    $0x10,%rcx
          3.24 │       cmp    %rcx,%r11
               │     → jne    swapgs_restore_regs_and_return_to_usermode
          2.27 │       cmpq   $0x33,0x88(%rsp)
          1.29 │     → jne    swapgs_restore_regs_and_return_to_usermode
               │       mov    0x30(%rsp),%r11
          8.74 │       cmp    %r11,0x90(%rsp)
               │     → jne    swapgs_restore_regs_and_return_to_usermode
          0.32 │       test   $0x10100,%r11
               │     → jne    swapgs_restore_regs_and_return_to_usermode
          0.32 │       cmpq   $0x2b,0xa0(%rsp)
          0.65 │     → jne    swapgs_restore_regs_and_return_to_usermode
      
      It'll behave just like a "call" instruction, i.e. press enter or right
      arrow over one such line and the browser will navigate to the annotated
      disassembly of that function, which when exited, via left arrow or esc,
      will come back to the calling function.
      
      Now to support jump to an offset on a different function...
      Reported-by: NLinus Torvalds <torvalds@linux-foundation.org>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: https://lkml.kernel.org/n/tip-78o508mqvr8inhj63ddtw7mo@git.kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      e4cc91b8
    • A
      perf annotate: Add "_local" to jump/offset validation routines · 2eff0611
      Arnaldo Carvalho de Melo 提交于
      Because they all really check if we can access data structures/visual
      constructs where a "jump" instruction targets code in the same function,
      i.e. things like:
      
        __pthread_mutex_lock  /usr/lib64/libpthread-2.26.so
        1.95 │       mov    __pthread_force_elision,%ecx
             │    ┌──test   %ecx,%ecx
        0.07 │    ├──je     60
             │    │  test   $0x300,%esi
             │    │↓ jne    60
             │    │  or     $0x100,%esi
             │    │  mov    %esi,0x10(%rdi)
             │ 42:│  mov    %esi,%edx
             │    │  lea    0x16(%r8),%rsi
             │    │  mov    %r8,%rdi
             │    │  and    $0x80,%edx
             │    │  add    $0x8,%rsp
             │    │→ jmpq   __lll_lock_elision
             │    │  nop
        0.29 │ 60:└─→and    $0x80,%esi
        0.07 │       mov    $0x1,%edi
        0.29 │       xor    %eax,%eax
        2.53 │       lock   cmpxchg %edi,(%r8)
      
      And not things like that "jmpq __lll_lock_elision", that instead should behave
      like a "call" instruction and "jump" to the disassembly of "___lll_lock_elision".
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Wang Nan <wangnan0@huawei.com>
      Link: https://lkml.kernel.org/n/tip-3cwx39u3h66dfw9xjrlt7ca2@git.kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      2eff0611
  4. 21 3月, 2018 29 次提交
  5. 05 3月, 2018 3 次提交
  6. 17 2月, 2018 1 次提交
    • J
      perf report: Fix wrong jump arrow · b40982e8
      Jin Yao 提交于
      When we use perf report interactive annotate view, we can see
      the position of jump arrow is not correct. For example,
      
      1. perf record -b ...
      2. perf report
      3. In interactive mode, select Annotate 'function'
      
      Percent│ IPC Cycle
             │                                if (flag)
        1.37 │0.4┌──   1      ↓ je     82
             │   │                                    x += x / y + y / x;
        0.00 │0.4│  1310        movsd  (%rsp),%xmm0
        0.00 │0.4│   565        movsd  0x8(%rsp),%xmm4
             │0.4│              movsd  0x8(%rsp),%xmm1
             │0.4│              movsd  (%rsp),%xmm3
             │0.4│              divsd  %xmm4,%xmm0
        0.00 │0.4│   579        divsd  %xmm3,%xmm1
             │0.4│              movsd  (%rsp),%xmm2
             │0.4│              addsd  %xmm1,%xmm0
             │0.4│              addsd  %xmm2,%xmm0
        0.00 │0.4│              movsd  %xmm0,(%rsp)
             │   │                    volatile double x = 1212121212, y = 121212;
             │   │
             │   │                    s_randseed = time(0);
             │   │                    srand(s_randseed);
             │   │
             │   │                    for (i = 0; i < 2000000000; i++) {
        1.37 │0.4└─→      82:   sub    $0x1,%ebx
       28.21 │0.48    17      ↑ jne    38
      
      The jump arrow in above example is not correct. It should add the
      width of IPC and Cycle.
      
      With this patch, the result is:
      
      Percent│ IPC Cycle
             │                                if (flag)
        1.37 │0.48     1     ┌──je     82
             │               │                        x += x / y + y / x;
        0.00 │0.48  1310     │  movsd  (%rsp),%xmm0
        0.00 │0.48   565     │  movsd  0x8(%rsp),%xmm4
             │0.48           │  movsd  0x8(%rsp),%xmm1
             │0.48           │  movsd  (%rsp),%xmm3
             │0.48           │  divsd  %xmm4,%xmm0
        0.00 │0.48   579     │  divsd  %xmm3,%xmm1
             │0.48           │  movsd  (%rsp),%xmm2
             │0.48           │  addsd  %xmm1,%xmm0
             │0.48           │  addsd  %xmm2,%xmm0
        0.00 │0.48           │  movsd  %xmm0,(%rsp)
             │               │        volatile double x = 1212121212, y = 121212;
             │               │
             │               │        s_randseed = time(0);
             │               │        srand(s_randseed);
             │               │
             │               │        for (i = 0; i < 2000000000; i++) {
        1.37 │0.48        82:└─→sub    $0x1,%ebx
       28.21 │0.48    17      ↑ jne    38
      
      Committer notes:
      
      Please note that only from LBRv5 (according to Jiri) onwards, i.e. >=
      Skylake is that we'll have the cycles counts in each branch record
      entry, so to see the Cycles and IPC columns, and be able to test this
      patch, one need a capable hardware.
      
      While applying this I first tested it on a Broadwell class machine and
      couldn't get those columns, will add code to the annotate browser to
      warn the user about that, i.e. you have branch records, but no cycles,
      use a more recent hardware to get the cycles and IPC columns.
      Signed-off-by: NJin Yao <yao.jin@linux.intel.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Jin Yao <yao.jin@intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Kan Liang <kan.liang@intel.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Link: http://lkml.kernel.org/r/1517223473-14750-1-git-send-email-yao.jin@linux.intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      b40982e8
  7. 27 12月, 2017 1 次提交
  8. 17 11月, 2017 2 次提交