1. 14 10月, 2020 2 次提交
  2. 05 9月, 2020 1 次提交
    • A
      perf annotate: Allow configuring the 'disassembler_style' knob via 'perf config' · bbe54468
      Arnaldo Carvalho de Melo 提交于
        # perf annotate --stdio2 acpi_processor_ffh_cstate_enter > default
        # perf config annotate.disassembler_style=intel
        # perf config annotate.disassembler_style
        annotate.disassembler_style=intel
        # perf annotate --stdio2 acpi_processor_ffh_cstate_enter > intel
        # diff -u default intel
        --- default	2020-09-04 13:09:26.019205732 -0300
        +++ intel	2020-09-04 13:09:52.823795081 -0300
        @@ -1,42 +1,42 @@
         Samples: 1K of event 'cycles', 4000 Hz, Event count (approx.): 990065316, [percent: local period]
         acpi_processor_ffh_cstate_enter() /lib/modules/5.9.0-rc3/build/vmlinux
        -Percent     → callq   __fentry__
        -              mov     cpu_number,%edx
        -              mov     %edx,%edx
        -              mov     cpu_cstate_entry,%rax
        -              add     -0x7dbe9700(,%rdx,8),%rax
        -              movzbl  0x9(%rdi),%edx
        -              mov     0x4(%rax,%rdx,8),%edi
        -              mov     (%rax,%rdx,8),%esi
        -            → jmpq    137ccc6
        -        2d: → jmpq    137ccd8
        +Percent     → call    __fentry__
        +              mov     edx,DWORD PTR gs:[rip+0x7e541d74]
        +              mov     edx,edx
        +              mov     rax,QWORD PTR [rip+0x152b8fb]
        +              add     rax,QWORD PTR [rdx*8-0x7dbe9700]
        +              movzx   edx,BYTE PTR [rdi+0x9]
        +              mov     edi,DWORD PTR [rax+rdx*8+0x4]
        +              mov     esi,DWORD PTR [rax+rdx*8]
        +            → jmp     137ccc6
        +        2d: → jmp     137ccd8
                       mfence
        -              mov     %gs:0x17bc0,%rax
        -              clflush (%rax)
        +              mov     rax,QWORD PTR gs:0x17bc0
        +              clflush BYTE PTR [rax]
                       mfence
        -              xor     %edx,%edx
        -              mov     %rdx,%rcx
        -              mov     %gs:0x17bc0,%rax
        -  0.00        monitor %rax,%ecx,%edx
        -              mov     (%rax),%rax
        -              test    $0x8,%al
        +              xor     edx,edx
        +              mov     rcx,rdx
        +              mov     rax,QWORD PTR gs:0x17bc0
        +  0.00        monitor
        +              mov     rax,QWORD PTR [rax]
        +              test    al,0x8
                     ↓ jne     71
        -            ↓ jmpq    68
        -              verw    0x538b08(%rip)        # ffffffff82008150 <ds.0>
        -        68:   mov     %rsi,%rax
        -              mov     %rdi,%rcx
        -100.00        mwait   %eax,%ecx
        -        71:   mov     %gs:0x17bc0,%rax
        -              lock    andb    $0xdf,0x2(%rax)
        -              lock    addl    $0x0,-0x4(%rsp)
        -              mov     (%rax),%rax
        -              test    $0x8,%al
        +            ↓ jmp     68
        +              verw    WORD PTR [rip+0x538b08]        # ffffffff82008150 <ds.0>
        +        68:   mov     rax,rsi
        +              mov     rcx,rdi
        +100.00        mwait
        +        71:   mov     rax,QWORD PTR gs:0x17bc0
        +              lock    and     BYTE PTR [rax+0x2],0xdf
        +              lock    add     DWORD PTR [rsp-0x4],0x0
        +              mov     rax,QWORD PTR [rax]
        +              test    al,0x8
                     ↓ je      97
        -              andl    $0x7fffffff,__preempt_count
        -        97: ← retq
        -              mov     %gs:0x17bc0,%rax
        -              lock    orb     $0x20,0x2(%rax)
        -              mov     (%rax),%rax
        -              test    $0x8,%al
        +              and     DWORD PTR gs:[rip+0x7e548509],0x7fffffff
        +        97:   ret
        +              mov     rax,QWORD PTR gs:0x17bc0
        +              lock    or      BYTE PTR [rax+0x2],0x20
        +              mov     rax,QWORD PTR [rax]
        +              test    al,0x8
                     ↑ jne     71
        -            ↑ jmpq    2d
        +            ↑ jmp     2d
        #
      Requested-by: NMatt P. Dziubinski <matdzb@gmail.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      bbe54468
  3. 09 7月, 2020 1 次提交
    • N
      perf annotate: Fix non-null terminated buffer returned by readlink() · b39730a6
      Numfor Mbiziwo-Tiapo 提交于
      Our local MSAN (Memory Sanitizer) build of perf throws a warning that
      comes from the "dso__disassemble_filename" function in
      "tools/perf/util/annotate.c" when running perf record.
      
      The warning stems from the call to readlink, in which "build_id_path"
      was being read into "linkname". Since readlink does not null terminate,
      an uninitialized memory access would later occur when "linkname" is
      passed into the strstr function. This is simply fixed by
      null-terminating "linkname" after the call to readlink.
      
      To reproduce this warning, build perf by running:
      
        $ make -C tools/perf CLANG=1 CC=clang EXTRA_CFLAGS="-fsanitize=memory -fsanitize-memory-track-origins"
      
      (Additionally, llvm might have to be installed and clang might have to
      be specified as the compiler - export CC=/usr/bin/clang)
      
      Then running:
      
        tools/perf/perf record -o - ls / | tools/perf/perf --no-pager annotate -i - --stdio
      
      Please see the cover letter for why false positive warnings may be
      generated.
      Signed-off-by: NNumfor Mbiziwo-Tiapo <nums@google.com>
      Acked-by: NIan Rogers <irogers@google.com>
      Acked-by: NJiri Olsa <jolsa@kernel.org>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Mark Drayton <mbd@fb.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Song Liu <songliubraving@fb.com>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20190729205750.193289-1-nums@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      b39730a6
  4. 02 6月, 2020 1 次提交
  5. 06 5月, 2020 4 次提交
  6. 16 4月, 2020 1 次提交
    • J
      perf annotate: Add basic support for bpf_image · 3c29d448
      Jiri Olsa 提交于
      Add the DSO_BINARY_TYPE__BPF_IMAGE dso binary type to recognize BPF
      images that carry trampoline or dispatcher.
      
      Upcoming patches will add support to read the image data, store it
      within the BPF feature in perf.data and display it for annotation
      purposes.
      
      Currently we only display following message:
      
        # ./perf annotate bpf_trampoline_24456 --stdio
         Percent |      Source code & Disassembly of . for cycles (504  ...
        --------------------------------------------------------------- ...
                 :       to be implemented
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Acked-by: NSong Liu <songliubraving@fb.com>
      Cc: Alexei Starovoitov <ast@kernel.org>
      Cc: Andrii Nakryiko <andriin@fb.com>
      Cc: Björn Töpel <bjorn.topel@intel.com>
      Cc: Daniel Borkmann <daniel@iogearbox.net>
      Cc: David S. Miller <davem@redhat.com>
      Cc: Jakub Kicinski <kuba@kernel.org>
      Cc: Jesper Dangaard Brouer <hawk@kernel.org>
      Cc: John Fastabend <john.fastabend@gmail.com>
      Cc: Martin KaFai Lau <kafai@fb.com>
      Cc: Yonghong Song <yhs@fb.com>
      Link: https://lore.kernel.org/bpf/20200312195610.346362-16-jolsa@kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      3c29d448
  7. 04 3月, 2020 1 次提交
  8. 27 2月, 2020 7 次提交
    • R
      perf annotate: Fix segfault with source toggle · e0560ba6
      Ravi Bangoria 提交于
      While rendering annotate browser from perf report tui, we keep track
      of total number of lines(asm + source) in annotation->nr_entries and
      total number of asm lines in annotation->nr_asm_entries. But we don't
      reset them before starting. Thus if user annotates same function
      multiple times, we restart incrementing these fields with old values.
      
      This causes a segfault when user tries to toggle source code after
      annotating same function multiple times. Fix it.
      Signed-off-by: NRavi Bangoria <ravi.bangoria@linux.ibm.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Acked-by: NJiri Olsa <jolsa@redhat.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Song Liu <songliubraving@fb.com>
      Link: http://lore.kernel.org/lkml/20200204045233.474937-5-ravi.bangoria@linux.ibm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      e0560ba6
    • R
      perf annotate: Align struct annotate_args · d3c03147
      Ravi Bangoria 提交于
      Align fields of struct annotate_args.
      Signed-off-by: NRavi Bangoria <ravi.bangoria@linux.ibm.com>
      Acked-by: NJiri Olsa <jolsa@redhat.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Song Liu <songliubraving@fb.com>
      Link: http://lore.kernel.org/lkml/20200204045233.474937-4-ravi.bangoria@linux.ibm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      d3c03147
    • R
      perf annotate: Simplify disasm_line allocation and freeing code · 2316f861
      Ravi Bangoria 提交于
      We are allocating disasm_line object in annotation_line__new() instead
      of disasm_line__new(). Similarly annotation_line__delete() is actually
      freeing disasm_line object as well. This complexity is because of
      privsize.  But we don't need privsize anymore so get rid of privsize and
      simplify disasm_line allocation and freeing code.
      Signed-off-by: NRavi Bangoria <ravi.bangoria@linux.ibm.com>
      Acked-by: NJiri Olsa <jolsa@redhat.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Song Liu <songliubraving@fb.com>
      Link: http://lore.kernel.org/lkml/20200204045233.474937-3-ravi.bangoria@linux.ibm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      2316f861
    • R
      perf annotate: Remove privsize from symbol__annotate() args · e0ad4d68
      Ravi Bangoria 提交于
      privsize is passed as 0 from all the symbol__annotate() callers.
      Remove it from argument list.
      Signed-off-by: NRavi Bangoria <ravi.bangoria@linux.ibm.com>
      Acked-by: NJiri Olsa <jolsa@redhat.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Song Liu <songliubraving@fb.com>
      Link: http://lore.kernel.org/lkml/20200204045233.474937-2-ravi.bangoria@linux.ibm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      e0ad4d68
    • R
      perf annotate: Make perf config effective · 7384083b
      Ravi Bangoria 提交于
      perf default config set by user in [annotate] section is totally ignored
      by annotate code. Fix it.
      
      Before:
      
        $ ./perf config
        annotate.hide_src_code=true
        annotate.show_nr_jumps=true
        annotate.show_nr_samples=true
      
        $ ./perf annotate shash
               │    unsigned h = 0;
               │      movl   $0x0,-0xc(%rbp)
               │    while (*s)
               │    ↓ jmp    44
               │    h = 65599 * h + *s++;
         11.33 │24:   mov    -0xc(%rbp),%eax
         43.50 │      imul   $0x1003f,%eax,%ecx
               │      mov    -0x18(%rbp),%rax
      
      After:
      
               │        movl   $0x0,-0xc(%rbp)
               │      ↓ jmp    44
             1 │1 24:   mov    -0xc(%rbp),%eax
             4 │        imul   $0x1003f,%eax,%ecx
               │        mov    -0x18(%rbp),%rax
      
      Note that we have removed show_nr_samples and show_total_period from
      annotation_options because they are not used. Instead of them we use
      symbol_conf.show_nr_samples and symbol_conf.show_total_period.
      
      Committer testing:
      
      Using 'perf annotate --stdio2' to use the TUI rendering but emitting the output to stdio:
      
        # perf config
        #
        # perf config annotate.hide_src_code=true
        # perf config
        annotate.hide_src_code=true
        #
        # perf config annotate.show_nr_jumps=true
        # perf config annotate.show_nr_samples=true
        # perf config
        annotate.hide_src_code=true
        annotate.show_nr_jumps=true
        annotate.show_nr_samples=true
        #
        #
      
      Before:
      
        # perf annotate --stdio2 ObjectInstance::weak_pointer_was_finalized
        Samples: 1  of event 'cycles', 4000 Hz, Event count (approx.): 830873, [percent: local period]
        ObjectInstance::weak_pointer_was_finalized() /usr/lib64/libgjs.so.0.0.0
        Percent
                    00000000000609f0 <ObjectInstance::weak_pointer_was_finalized()@@base>:
                      endbr64
                      cmpq    $0x0,0x20(%rdi)
                    ↓ je      10
                      xor     %eax,%eax
                    ← retq
                      xchg    %ax,%ax
        100.00  10:   push    %rbp
                      cmpq    $0x0,0x18(%rdi)
                      mov     %rdi,%rbp
                    ↓ jne     20
                1b:   xor     %eax,%eax
                      pop     %rbp
                    ← retq
                      nop
                20:   lea     0x18(%rdi),%rdi
                    → callq   JS_UpdateWeakPointerAfterGC(JS::Heap<JSObject*
                      cmpq    $0x0,0x18(%rbp)
                    ↑ jne     1b
                      mov     %rbp,%rdi
                    → callq   ObjectBase::jsobj_addr() const@plt
                      mov     $0x1,%eax
                      pop     %rbp
                    ← retq
        #
      
      After:
      
        # perf annotate --stdio2 ObjectInstance::weak_pointer_was_finalized 2> /dev/null
        Samples: 1  of event 'cycles', 4000 Hz, Event count (approx.): 830873, [percent: local period]
        ObjectInstance::weak_pointer_was_finalized() /usr/lib64/libgjs.so.0.0.0
        Samples       endbr64
                      cmpq    $0x0,0x20(%rdi)
                    ↓ je      10
                      xor     %eax,%eax
                    ← retq
                      xchg    %ax,%ax
           1  1 10:   push    %rbp
                      cmpq    $0x0,0x18(%rdi)
                      mov     %rdi,%rbp
                    ↓ jne     20
              1 1b:   xor     %eax,%eax
                      pop     %rbp
                    ← retq
                      nop
              1 20:   lea     0x18(%rdi),%rdi
                    → callq   JS_UpdateWeakPointerAfterGC(JS::Heap<JSObject*
                      cmpq    $0x0,0x18(%rbp)
                    ↑ jne     1b
                      mov     %rbp,%rdi
                    → callq   ObjectBase::jsobj_addr() const@plt
                      mov     $0x1,%eax
                      pop     %rbp
                    ← retq
        #
        # perf config annotate.show_nr_jumps
        annotate.show_nr_jumps=true
        # perf config annotate.show_nr_jumps=false
        # perf config annotate.show_nr_jumps
        annotate.show_nr_jumps=false
        #
        # perf annotate --stdio2 ObjectInstance::weak_pointer_was_finalized 2> /dev/null
        Samples: 1  of event 'cycles', 4000 Hz, Event count (approx.): 830873, [percent: local period]
        ObjectInstance::weak_pointer_was_finalized() /usr/lib64/libgjs.so.0.0.0
        Samples       endbr64
                      cmpq    $0x0,0x20(%rdi)
                    ↓ je      10
                      xor     %eax,%eax
                    ← retq
                      xchg    %ax,%ax
             1  10:   push    %rbp
                      cmpq    $0x0,0x18(%rdi)
                      mov     %rdi,%rbp
                    ↓ jne     20
                1b:   xor     %eax,%eax
                      pop     %rbp
                    ← retq
                      nop
                20:   lea     0x18(%rdi),%rdi
                    → callq   JS_UpdateWeakPointerAfterGC(JS::Heap<JSObject*
                      cmpq    $0x0,0x18(%rbp)
                    ↑ jne     1b
                      mov     %rbp,%rdi
                    → callq   ObjectBase::jsobj_addr() const@plt
                      mov     $0x1,%eax
                      pop     %rbp
                    ← retq
        #
      Signed-off-by: NRavi Bangoria <ravi.bangoria@linux.ibm.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
      Cc: Changbin Du <changbin.du@intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Song Liu <songliubraving@fb.com>
      Cc: Taeung Song <treeze.taeung@gmail.com>
      Cc: Thomas Richter <tmricht@linux.ibm.com>
      Cc: Yisheng Xie <xieyisheng1@huawei.com>
      Link: http://lore.kernel.org/lkml/20200213064306.160480-6-ravi.bangoria@linux.ibm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      7384083b
    • R
      perf annotate: Fix --show-nr-samples for tui/stdio2 · 46ccb442
      Ravi Bangoria 提交于
      perf annotate --show-nr-samples does not really show number of samples.
      
      The reason is we have two separate variables for the same purpose.
      
      One is in symbol_conf.show_nr_samples and another is
      annotation_options.show_nr_samples.
      
      We save command line option in symbol_conf.show_nr_samples but uses
      annotation_option.show_nr_samples while rendering tui/stdio2 browser.
      
      Though, we copy symbol_conf.show_nr_samples to
      annotation__default_options.show_nr_samples but that is not really
      effective as we don't use annotation__default_options once we copy
      default options to dynamic variable annotate.opts in cmd_annotate().
      
      Instead of all these complication, keep only one variable and use it all
      over. symbol_conf.show_nr_samples is used by perf report/top as well. So
      let's kill annotation_options.show_nr_samples.
      
      On a side note, I've kept annotation_options.show_nr_samples definition
      because it's still used by perf-config code. Follow up patch to fix
      perf-config for annotate will remove annotation_options.show_nr_samples.
      Signed-off-by: NRavi Bangoria <ravi.bangoria@linux.ibm.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
      Cc: Changbin Du <changbin.du@intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Song Liu <songliubraving@fb.com>
      Cc: Taeung Song <treeze.taeung@gmail.com>
      Cc: Thomas Richter <tmricht@linux.ibm.com>
      Cc: Yisheng Xie <xieyisheng1@huawei.com>
      Link: http://lore.kernel.org/lkml/20200213064306.160480-4-ravi.bangoria@linux.ibm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      46ccb442
    • R
      perf annotate: Fix --show-total-period for tui/stdio2 · 68aac855
      Ravi Bangoria 提交于
      perf annotate --show-total-period does not really show total period.
      
      The reason is we have two separate variables for the same purpose.
      
      One is in symbol_conf.show_total_period and another is
      annotation_options.show_total_period.
      
      We save command line option in symbol_conf.show_total_period but uses
      annotation_option.show_total_period while rendering tui/stdio2 browser.
      
      Though, we copy symbol_conf.show_total_period to
      annotation__default_options.show_total_period but that is not really
      effective as we don't use annotation__default_options once we copy
      default options to dynamic variable annotate.opts in cmd_annotate().
      
      Instead of all these complication, keep only one variable and use it all
      over. symbol_conf.show_total_period is used by perf report/top as well.
      So let's kill annotation_options.show_total_period.
      
      On a side note, I've kept annotation_options.show_total_period
      definition because it's still used by perf-config code. Follow up patch
      to fix perf-config for annotate will remove
      annotation_options.show_total_period.
      Signed-off-by: NRavi Bangoria <ravi.bangoria@linux.ibm.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
      Cc: Changbin Du <changbin.du@intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Song Liu <songliubraving@fb.com>
      Cc: Taeung Song <treeze.taeung@gmail.com>
      Cc: Thomas Richter <tmricht@linux.ibm.com>
      Cc: Yisheng Xie <xieyisheng1@huawei.com>
      Link: http://lore.kernel.org/lkml/20200213064306.160480-3-ravi.bangoria@linux.ibm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      68aac855
  9. 14 1月, 2020 1 次提交
    • A
      perf tools: Support --prefix/--prefix-strip · 3b0b16bf
      Andi Kleen 提交于
      The objdump utility has useful --prefix / --prefix-strip options to
      allow changing source code file names hardcoded into executables' debug
      info. Add options to 'perf report', 'perf top' and 'perf annotate',
      which are then passed to objdump.
      
        $ mkdir foo
        $ echo 'main() { for (;;); }' > foo/foo.c
        $ gcc -g foo/foo.c
        foo/foo.c:1:1: warning: return type defaults to ‘int’ [-Wimplicit-int]
            1 | main() { for (;;); }
              | ^~~~
        $ perf record ./a.out
        ^C[ perf record: Woken up 1 times to write data ]
        [ perf record: Captured and wrote 0.230 MB perf.data (5721 samples) ]
        $ mv foo bar
        $ perf annotate
        <does not show source code>
        $ perf annotate --prefix=/home/ak/lsrc/git/bar --prefix-strip=5
        <does show source code>
      Signed-off-by: NAndi Kleen <ak@linux.intel.com>
      Tested-by: NJiri Olsa <jolsa@redhat.com>
      LPU-Reference: 20200107210444.214071-1-andi@firstfloor.org
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      3b0b16bf
  10. 26 11月, 2019 3 次提交
  11. 12 11月, 2019 4 次提交
  12. 07 11月, 2019 1 次提交
  13. 15 10月, 2019 6 次提交
  14. 11 10月, 2019 1 次提交
    • J
      perf diff: Report noisy for cycles diff · cebf7d51
      Jin Yao 提交于
      This patch prints the stddev and hist for the cycles diff of program
      block. It can help us to understand if the cycles is noisy or not.
      
      This patch is inspired by Andi Kleen's patch:
      
        https://lwn.net/Articles/600471/
      
      We create new option '--cycles-hist'.
      
      Example:
      
        perf record -b ./div
        perf record -b ./div
        perf diff -c cycles
      
        # Baseline                                [Program Block Range] Cycles Diff  Shared Object      Symbol
        # ........  .......................................................... ....  .................  ............................
        #
            46.72%                                      [div.c:40 -> div.c:40]    0  div                [.] main
            46.72%                                      [div.c:42 -> div.c:44]    0  div                [.] main
            46.72%                                      [div.c:42 -> div.c:39]    0  div                [.] main
            20.54%                          [random_r.c:357 -> random_r.c:394]    1  libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:357 -> random_r.c:380]    0  libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:388 -> random_r.c:388]    0  libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:388 -> random_r.c:391]    0  libc-2.27.so       [.] __random_r
            17.04%                              [random.c:288 -> random.c:291]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:291 -> random.c:291]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:293 -> random.c:293]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:295 -> random.c:295]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:295 -> random.c:295]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:298 -> random.c:298]    0  libc-2.27.so       [.] __random
             8.40%                                      [div.c:22 -> div.c:25]    0  div                [.] compute_flag
             8.40%                                      [div.c:27 -> div.c:28]    0  div                [.] compute_flag
             5.14%                                    [rand.c:26 -> rand.c:27]    0  libc-2.27.so       [.] rand
             5.14%                                    [rand.c:28 -> rand.c:28]    0  libc-2.27.so       [.] rand
             2.15%                                  [rand@plt+0 -> rand@plt+0]    0  div                [.] rand@plt
             0.00%                                                                   [kernel.kallsyms]  [k] __x86_indirect_thunk_rax
             0.00%                                [do_mmap+714 -> do_mmap+732]  -10  [kernel.kallsyms]  [k] do_mmap
             0.00%                                [do_mmap+737 -> do_mmap+765]    1  [kernel.kallsyms]  [k] do_mmap
             0.00%                                [do_mmap+262 -> do_mmap+299]    0  [kernel.kallsyms]  [k] do_mmap
             0.00%  [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0]    7  [kernel.kallsyms]  [k] __x86_indirect_thunk_r15
             0.00%            [native_sched_clock+0 -> native_sched_clock+119]   -1  [kernel.kallsyms]  [k] native_sched_clock
             0.00%                 [native_write_msr+0 -> native_write_msr+16]  -13  [kernel.kallsyms]  [k] native_write_msr
      
      When we enable the option '--cycles-hist', the output is
      
        perf diff -c cycles --cycles-hist
      
        # Baseline                                [Program Block Range] Cycles Diff        stddev/Hist  Shared Object      Symbol
        # ........  .......................................................... ....  .................  .................  ............................
        #
            46.72%                                      [div.c:40 -> div.c:40]    0  ± 37.8% ▁█▁▁██▁█   div                [.] main
            46.72%                                      [div.c:42 -> div.c:44]    0  ± 49.4% ▁▁▂█▂▂▂▂   div                [.] main
            46.72%                                      [div.c:42 -> div.c:39]    0  ± 24.1% ▃█▂▄▁▃▂▁   div                [.] main
            20.54%                          [random_r.c:357 -> random_r.c:394]    1  ± 33.5% ▅▂▁█▃▁▂▁   libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:357 -> random_r.c:380]    0  ± 39.4% ▁▁█▁██▅▁   libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:388 -> random_r.c:388]    0                     libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:388 -> random_r.c:391]    0  ± 41.2% ▁▃▁▂█▄▃▁   libc-2.27.so       [.] __random_r
            17.04%                              [random.c:288 -> random.c:291]    0  ± 48.8% ▁▁▁▁███▁   libc-2.27.so       [.] __random
            17.04%                              [random.c:291 -> random.c:291]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
            17.04%                              [random.c:293 -> random.c:293]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
            17.04%                              [random.c:295 -> random.c:295]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
            17.04%                              [random.c:295 -> random.c:295]    0                     libc-2.27.so       [.] __random
            17.04%                              [random.c:298 -> random.c:298]    0  ± 75.6% ▃█▁▁▁▁▁▁   libc-2.27.so       [.] __random
             8.40%                                      [div.c:22 -> div.c:25]    0  ± 42.1% ▁▃▁▁███▁   div                [.] compute_flag
             8.40%                                      [div.c:27 -> div.c:28]    0  ± 41.8% ██▁▁▄▁▁▄   div                [.] compute_flag
             5.14%                                    [rand.c:26 -> rand.c:27]    0  ± 37.8% ▁▁▁████▁   libc-2.27.so       [.] rand
             5.14%                                    [rand.c:28 -> rand.c:28]    0                     libc-2.27.so       [.] rand
             2.15%                                  [rand@plt+0 -> rand@plt+0]    0                     div                [.] rand@plt
             0.00%                                                                                      [kernel.kallsyms]  [k] __x86_indirect_thunk_rax
             0.00%                                [do_mmap+714 -> do_mmap+732]  -10                     [kernel.kallsyms]  [k] do_mmap
             0.00%                                [do_mmap+737 -> do_mmap+765]    1                     [kernel.kallsyms]  [k] do_mmap
             0.00%                                [do_mmap+262 -> do_mmap+299]    0                     [kernel.kallsyms]  [k] do_mmap
             0.00%  [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0]    7                     [kernel.kallsyms]  [k] __x86_indirect_thunk_r15
             0.00%            [native_sched_clock+0 -> native_sched_clock+119]   -1  ± 38.5% ▄█▁        [kernel.kallsyms]  [k] native_sched_clock
             0.00%                 [native_write_msr+0 -> native_write_msr+16]  -13  ± 47.1% ▁█▇▃▁▁     [kernel.kallsyms]  [k] native_write_msr
      
       v8:
       ---
       Rebase to perf/core branch
      
       v7:
       ---
       1. v6 got Jiri's ACK.
       2. Rebase to latest perf/core branch.
      
       v6:
       ---
       1. Jiri provides better code for using data__hpp_register() in ui_init().
          Use this code in v6.
      
       v5:
       ---
       1. Refine the use of data__hpp_register() in ui_init() according to
          Jiri's suggestion.
      
       v4:
       ---
       1. Rename the new option from '--noisy' to '--cycles-hist'
       2. Remove the option '-n'.
       3. Only update the spark value and stats when '--cycles-hist' is enabled.
       4. Remove the code of printing '..'.
      
       v3:
       ---
       1. Move the histogram to a separate column
       2. Move the svals[] out of struct stats
      
       v2:
       ---
       Jiri got a compile error,
      
        CC       builtin-diff.o
        builtin-diff.c: In function ‘compute_cycles_diff’:
        builtin-diff.c:712:10: error: taking the absolute value of unsigned type ‘u64’ {aka ‘long unsigned int’} has no effect [-Werror=absolute-value]
        712 |          labs(pair->block_info->cycles_spark[i] -
            |          ^~~~
      
       Because the result of u64 - u64 is still u64. Now we change the type of
       cycles_spark[] to s64.
      Signed-off-by: NJin Yao <yao.jin@linux.intel.com>
      Acked-by: NJiri Olsa <jolsa@kernel.org>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Kan Liang <kan.liang@linux.intel.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Link: http://lore.kernel.org/lkml/20190925011446.30678-1-yao.jin@linux.intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      cebf7d51
  15. 01 10月, 2019 6 次提交