1. 11 10月, 2019 1 次提交
    • J
      perf diff: Report noisy for cycles diff · cebf7d51
      Jin Yao 提交于
      This patch prints the stddev and hist for the cycles diff of program
      block. It can help us to understand if the cycles is noisy or not.
      
      This patch is inspired by Andi Kleen's patch:
      
        https://lwn.net/Articles/600471/
      
      We create new option '--cycles-hist'.
      
      Example:
      
        perf record -b ./div
        perf record -b ./div
        perf diff -c cycles
      
        # Baseline                                [Program Block Range] Cycles Diff  Shared Object      Symbol
        # ........  .......................................................... ....  .................  ............................
        #
            46.72%                                      [div.c:40 -> div.c:40]    0  div                [.] main
            46.72%                                      [div.c:42 -> div.c:44]    0  div                [.] main
            46.72%                                      [div.c:42 -> div.c:39]    0  div                [.] main
            20.54%                          [random_r.c:357 -> random_r.c:394]    1  libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:357 -> random_r.c:380]    0  libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:388 -> random_r.c:388]    0  libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:388 -> random_r.c:391]    0  libc-2.27.so       [.] __random_r
            17.04%                              [random.c:288 -> random.c:291]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:291 -> random.c:291]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:293 -> random.c:293]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:295 -> random.c:295]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:295 -> random.c:295]    0  libc-2.27.so       [.] __random
            17.04%                              [random.c:298 -> random.c:298]    0  libc-2.27.so       [.] __random
             8.40%                                      [div.c:22 -> div.c:25]    0  div                [.] compute_flag
             8.40%                                      [div.c:27 -> div.c:28]    0  div                [.] compute_flag
             5.14%                                    [rand.c:26 -> rand.c:27]    0  libc-2.27.so       [.] rand
             5.14%                                    [rand.c:28 -> rand.c:28]    0  libc-2.27.so       [.] rand
             2.15%                                  [rand@plt+0 -> rand@plt+0]    0  div                [.] rand@plt
             0.00%                                                                   [kernel.kallsyms]  [k] __x86_indirect_thunk_rax
             0.00%                                [do_mmap+714 -> do_mmap+732]  -10  [kernel.kallsyms]  [k] do_mmap
             0.00%                                [do_mmap+737 -> do_mmap+765]    1  [kernel.kallsyms]  [k] do_mmap
             0.00%                                [do_mmap+262 -> do_mmap+299]    0  [kernel.kallsyms]  [k] do_mmap
             0.00%  [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0]    7  [kernel.kallsyms]  [k] __x86_indirect_thunk_r15
             0.00%            [native_sched_clock+0 -> native_sched_clock+119]   -1  [kernel.kallsyms]  [k] native_sched_clock
             0.00%                 [native_write_msr+0 -> native_write_msr+16]  -13  [kernel.kallsyms]  [k] native_write_msr
      
      When we enable the option '--cycles-hist', the output is
      
        perf diff -c cycles --cycles-hist
      
        # Baseline                                [Program Block Range] Cycles Diff        stddev/Hist  Shared Object      Symbol
        # ........  .......................................................... ....  .................  .................  ............................
        #
            46.72%                                      [div.c:40 -> div.c:40]    0  ± 37.8% ▁█▁▁██▁█   div                [.] main
            46.72%                                      [div.c:42 -> div.c:44]    0  ± 49.4% ▁▁▂█▂▂▂▂   div                [.] main
            46.72%                                      [div.c:42 -> div.c:39]    0  ± 24.1% ▃█▂▄▁▃▂▁   div                [.] main
            20.54%                          [random_r.c:357 -> random_r.c:394]    1  ± 33.5% ▅▂▁█▃▁▂▁   libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:357 -> random_r.c:380]    0  ± 39.4% ▁▁█▁██▅▁   libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:388 -> random_r.c:388]    0                     libc-2.27.so       [.] __random_r
            20.54%                          [random_r.c:388 -> random_r.c:391]    0  ± 41.2% ▁▃▁▂█▄▃▁   libc-2.27.so       [.] __random_r
            17.04%                              [random.c:288 -> random.c:291]    0  ± 48.8% ▁▁▁▁███▁   libc-2.27.so       [.] __random
            17.04%                              [random.c:291 -> random.c:291]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
            17.04%                              [random.c:293 -> random.c:293]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
            17.04%                              [random.c:295 -> random.c:295]    0  ±100.0% ▁█▁▁▁▁▁▁   libc-2.27.so       [.] __random
            17.04%                              [random.c:295 -> random.c:295]    0                     libc-2.27.so       [.] __random
            17.04%                              [random.c:298 -> random.c:298]    0  ± 75.6% ▃█▁▁▁▁▁▁   libc-2.27.so       [.] __random
             8.40%                                      [div.c:22 -> div.c:25]    0  ± 42.1% ▁▃▁▁███▁   div                [.] compute_flag
             8.40%                                      [div.c:27 -> div.c:28]    0  ± 41.8% ██▁▁▄▁▁▄   div                [.] compute_flag
             5.14%                                    [rand.c:26 -> rand.c:27]    0  ± 37.8% ▁▁▁████▁   libc-2.27.so       [.] rand
             5.14%                                    [rand.c:28 -> rand.c:28]    0                     libc-2.27.so       [.] rand
             2.15%                                  [rand@plt+0 -> rand@plt+0]    0                     div                [.] rand@plt
             0.00%                                                                                      [kernel.kallsyms]  [k] __x86_indirect_thunk_rax
             0.00%                                [do_mmap+714 -> do_mmap+732]  -10                     [kernel.kallsyms]  [k] do_mmap
             0.00%                                [do_mmap+737 -> do_mmap+765]    1                     [kernel.kallsyms]  [k] do_mmap
             0.00%                                [do_mmap+262 -> do_mmap+299]    0                     [kernel.kallsyms]  [k] do_mmap
             0.00%  [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0]    7                     [kernel.kallsyms]  [k] __x86_indirect_thunk_r15
             0.00%            [native_sched_clock+0 -> native_sched_clock+119]   -1  ± 38.5% ▄█▁        [kernel.kallsyms]  [k] native_sched_clock
             0.00%                 [native_write_msr+0 -> native_write_msr+16]  -13  ± 47.1% ▁█▇▃▁▁     [kernel.kallsyms]  [k] native_write_msr
      
       v8:
       ---
       Rebase to perf/core branch
      
       v7:
       ---
       1. v6 got Jiri's ACK.
       2. Rebase to latest perf/core branch.
      
       v6:
       ---
       1. Jiri provides better code for using data__hpp_register() in ui_init().
          Use this code in v6.
      
       v5:
       ---
       1. Refine the use of data__hpp_register() in ui_init() according to
          Jiri's suggestion.
      
       v4:
       ---
       1. Rename the new option from '--noisy' to '--cycles-hist'
       2. Remove the option '-n'.
       3. Only update the spark value and stats when '--cycles-hist' is enabled.
       4. Remove the code of printing '..'.
      
       v3:
       ---
       1. Move the histogram to a separate column
       2. Move the svals[] out of struct stats
      
       v2:
       ---
       Jiri got a compile error,
      
        CC       builtin-diff.o
        builtin-diff.c: In function ‘compute_cycles_diff’:
        builtin-diff.c:712:10: error: taking the absolute value of unsigned type ‘u64’ {aka ‘long unsigned int’} has no effect [-Werror=absolute-value]
        712 |          labs(pair->block_info->cycles_spark[i] -
            |          ^~~~
      
       Because the result of u64 - u64 is still u64. Now we change the type of
       cycles_spark[] to s64.
      Signed-off-by: NJin Yao <yao.jin@linux.intel.com>
      Acked-by: NJiri Olsa <jolsa@kernel.org>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Kan Liang <kan.liang@linux.intel.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Link: http://lore.kernel.org/lkml/20190925011446.30678-1-yao.jin@linux.intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      cebf7d51
  2. 10 10月, 2019 21 次提交
  3. 09 10月, 2019 3 次提交
  4. 07 10月, 2019 2 次提交
  5. 01 10月, 2019 10 次提交
  6. 26 9月, 2019 3 次提交
    • A
      perf parser: Remove needless include directives · d6840d87
      Arnaldo Carvalho de Melo 提交于
      They go on accumulating there like the debug.h one, that was introduced
      here:
      
        f2361024 ("perf list: Add debug support for outputing alias string")
      
      But then, when that need is removed via:
      
        2073ad33 ("perf tools: Factor out PMU matching in parser")
      
      The thing stays there, so continue the house cleaning spree...
      
      list.h not needed, no macros from there are used, and 'struct
      list_head' is in linux/types.h, ditto for util.h, no need for that as
      well.
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Link: https://lkml.kernel.org/n/tip-zkxr3mf6inun8m5mbnil4u0d@git.kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      d6840d87
    • T
      perf jvmti: Include JVMTI support for s390 · 61bf4ee2
      Thomas Richter 提交于
      Enable JVMTI support for s390 perf tool chain.
      Signed-off-by: NThomas Richter <tmricht@linux.ibm.com>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Hendrik Brueckner <brueckner@linux.ibm.com>
      Cc: Vasily Gorbik <gor@linux.ibm.com>
      Link: http://lore.kernel.org/lkml/20190909114116.50469-3-tmricht@linux.ibm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      61bf4ee2
    • A
      perf evlist: Fix access of freed id arrays · 7834fa94
      Andi Kleen 提交于
      I'm not fully sure if this is the correct fix, but without this I get
      crashes on more complex perf stat metric usages. The problem is that
      part of the state gets freed when a weak group fails, but then is later
      still used. Just don't free the ids, we're going to reuse them anyways
      on the weak group retry.
      
      For example:
      
        % perf stat -M IpB,IpCall,IpTB,IPC,Retiring_SMT,Frontend_Bound_SMT,Kernel_Utilization,CPU_Utilization --metric-only -a -I 1000 sleep 2
      
        crashes and gives in valgrind:
      
        =21527== Invalid write of size 8
        ==21527==    at 0x4EE582: hlist_add_head (list.h:644)
        ==21527==    by 0x4EFD3C: perf_evlist__id_hash (evlist.c:477)
        ==21527==    by 0x4EFD99: perf_evlist__id_add (evlist.c:483)
        ==21527==    by 0x4EFF15: perf_evlist__id_add_fd (evlist.c:524)
        ==21527==    by 0x4FC693: store_evsel_ids (evsel.c:2969)
        ==21527==    by 0x4FC76C: perf_evsel__store_ids (evsel.c:2986)
        ==21527==    by 0x450DA7: __run_perf_stat (builtin-stat.c:519)
        ==21527==    by 0x451285: run_perf_stat (builtin-stat.c:636)
        ==21527==    by 0x454619: cmd_stat (builtin-stat.c:1966)
        ==21527==    by 0x4D557D: run_builtin (perf.c:310)
        ==21527==    by 0x4D57EA: handle_internal_command (perf.c:362)
        ==21527==    by 0x4D5931: run_argv (perf.c:406)
        ==21527==  Address 0x12e3f008 is 104 bytes inside a block of size 2,056 free'd
        ==21527==    at 0x4839A0C: free (vg_replace_malloc.c:540)
        ==21527==    by 0x627139: xyarray__delete (xyarray.c:32)
        ==21527==    by 0x4F6BE4: perf_evsel__free_id (evsel.c:1253)
        ==21527==    by 0x4FA11F: evsel__close (evsel.c:1994)
        ==21527==    by 0x4F30A3: perf_evlist__reset_weak_group (evlist.c:1783)
        ==21527==    by 0x450B47: __run_perf_stat (builtin-stat.c:466)
        ==21527==    by 0x451285: run_perf_stat (builtin-stat.c:636)
        ==21527==    by 0x454619: cmd_stat (builtin-stat.c:1966)
        ==21527==    by 0x4D557D: run_builtin (perf.c:310)
        ==21527==    by 0x4D57EA: handle_internal_command (perf.c:362)
        ==21527==    by 0x4D5931: run_argv (perf.c:406)
        ==21527==    by 0x4D5CAE: main (perf.c:531)
        ==21527==  Block was alloc'd at
        ==21527==    at 0x483AB1A: calloc (vg_replace_malloc.c:762)
        ==21527==    by 0x627024: zalloc (zalloc.c:8)
        ==21527==    by 0x627088: xyarray__new (xyarray.c:10)
        ==21527==    by 0x4F6B20: perf_evsel__alloc_id (evsel.c:1237)
        ==21527==    by 0x4FC74E: perf_evsel__store_ids (evsel.c:2983)
        ==21527==    by 0x450DA7: __run_perf_stat (builtin-stat.c:519)
        ==21527==    by 0x451285: run_perf_stat (builtin-stat.c:636)
        ==21527==    by 0x454619: cmd_stat (builtin-stat.c:1966)
        ==21527==    by 0x4D557D: run_builtin (perf.c:310)
        ==21527==    by 0x4D57EA: handle_internal_command (perf.c:362)
        ==21527==    by 0x4D5931: run_argv (perf.c:406)
        ==21527==    by 0x4D5CAE: main (perf.c:531)
      Signed-off-by: NAndi Kleen <ak@linux.intel.com>
      Acked-by: NJiri Olsa <jolsa@kernel.org>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Link: http://lore.kernel.org/lkml/20190923233339.25326-1-andi@firstfloor.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      7834fa94