1. 02 6月, 2020 2 次提交
    • A
      tools arch x86: Sync the msr-index.h copy with the kernel sources · 3b1f47d6
      Arnaldo Carvalho de Melo 提交于
      To pick up the changes in:
      
        5cde2653 ("perf/x86/rapl: Add AMD Fam17h RAPL support")
      
      Addressing this tools/perf build warning:
      
        Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h'
        diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h
      
      With this one will be able to use these new AMD MSRs in filters, by
      name, e.g.:
      
         # perf trace -e msr:* --filter="msr==AMD_PKG_ENERGY_STATUS || msr==AMD_RAPL_POWER_UNIT"
      
      Just like it is now possible with other MSRs:
      
        [root@five ~]# uname -a
        Linux five 5.5.17-200.fc31.x86_64 #1 SMP Mon Apr 13 15:29:42 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
        [root@five ~]# grep 'model name' -m1 /proc/cpuinfo
        model name	: AMD Ryzen 5 3600X 6-Core Processor
        [root@five ~]#
        [root@five ~]# perf trace -e msr:*/max-stack=16/ --filter="msr==AMD_PERF_CTL" --max-events=2
             0.000 kworker/1:1-ev/2327824 msr:write_msr(msr: AMD_PERF_CTL, val: 2)
                                               do_trace_write_msr ([kernel.kallsyms])
                                               do_trace_write_msr ([kernel.kallsyms])
                                               [0xffffffffc01d71c3] ([acpi_cpufreq])
                                               [0] ([unknown])
                                               __cpufreq_driver_target ([kernel.kallsyms])
                                               od_dbs_update ([kernel.kallsyms])
                                               dbs_work_handler ([kernel.kallsyms])
                                               process_one_work ([kernel.kallsyms])
                                               worker_thread ([kernel.kallsyms])
                                               kthread ([kernel.kallsyms])
                                               ret_from_fork ([kernel.kallsyms])
             8.597 kworker/2:2-ev/2338099 msr:write_msr(msr: AMD_PERF_CTL, val: 2)
                                               do_trace_write_msr ([kernel.kallsyms])
                                               do_trace_write_msr ([kernel.kallsyms])
                                               [0] ([unknown])
                                               [0] ([unknown])
                                               __cpufreq_driver_target ([kernel.kallsyms])
                                               od_dbs_update ([kernel.kallsyms])
                                               dbs_work_handler ([kernel.kallsyms])
                                               process_one_work ([kernel.kallsyms])
                                               worker_thread ([kernel.kallsyms])
                                               kthread ([kernel.kallsyms])
                                               ret_from_fork ([kernel.kallsyms])
        [root@five ~]#
      
      Longer explanation with what happens in the perf build process,
      automatically after this is made in synch with the kernel sources:
      
        $ make -C tools/perf O=/tmp/build/perf install-bin
        <SNIP>
        Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h'
        diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h
        <SNIP>
        make: Leaving directory '/home/acme/git/perf/tools/perf'
        $
        $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before
        $
        $ diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h
        --- tools/arch/x86/include/asm/msr-index.h	2020-06-02 10:46:36.217782288 -0300
        +++ arch/x86/include/asm/msr-index.h	2020-05-28 10:41:23.313794627 -0300
        @@ -301,6 +301,9 @@
         #define MSR_PP1_ENERGY_STATUS		0x00000641
         #define MSR_PP1_POLICY			0x00000642
      
        +#define MSR_AMD_PKG_ENERGY_STATUS	0xc001029b
        +#define MSR_AMD_RAPL_POWER_UNIT		0xc0010299
        +
         /* Config TDP MSRs */
         #define MSR_CONFIG_TDP_NOMINAL		0x00000648
         #define MSR_CONFIG_TDP_LEVEL_1		0x00000649
        $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h
        $
        $ make -C tools/perf O=/tmp/build/perf install-bin
        <SNIP>
          CC       /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o
          LD       /tmp/build/perf/trace/beauty/tracepoints/perf-in.o
          LD       /tmp/build/perf/trace/beauty/perf-in.o
          LD       /tmp/build/perf/perf-in.o
          LINK     /tmp/build/perf/perf
        <SNIP>
        make: Leaving directory '/home/acme/git/perf/tools/perf'
        $
        $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after
        $ diff -u before after
        --- before	2020-06-02 10:47:08.486334348 -0300
        +++ after	2020-06-02 10:47:33.075008948 -0300
        @@ -286,6 +286,8 @@
         	[0xc0010240 - x86_AMD_V_KVM_MSRs_offset] = "F15H_NB_PERF_CTL",
         	[0xc0010241 - x86_AMD_V_KVM_MSRs_offset] = "F15H_NB_PERF_CTR",
         	[0xc0010280 - x86_AMD_V_KVM_MSRs_offset] = "F15H_PTSC",
        +	[0xc0010299 - x86_AMD_V_KVM_MSRs_offset] = "AMD_RAPL_POWER_UNIT",
        +	[0xc001029b - x86_AMD_V_KVM_MSRs_offset] = "AMD_PKG_ENERGY_STATUS",
         	[0xc00102f0 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN_CTL",
         	[0xc00102f1 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN",
         };
        $
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: David Ahern <dsahern@gmail.com>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Stephane Eranian <eranian@google.com>
      Cc: Wang Nan <wangnan0@huawei.com>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      3b1f47d6
    • J
      perf stat: Ensure group is defined on top of the same cpu mask · a9a17902
      Jiri Olsa 提交于
      Jin Yao reported the issue (and posted first versions of this change)
      with groups being defined over events with different cpu mask.
      
      This causes assert aborts in get_group_fd, like:
      
        # perf stat -M "C2_Pkg_Residency" -a -- sleep 1
        perf: util/evsel.c:1464: get_group_fd: Assertion `!(fd == -1)' failed.
        Aborted
      
      All the events in the group have to be defined over the same cpus so the
      group_fd can be found for every leader/member pair.
      
      Adding check to ensure this condition is met and removing the group
      (with warning) if we detect mixed cpus, like:
      
        $ sudo perf stat -e '{power/energy-cores/,cycles},{instructions,power/energy-cores/}'
        WARNING: event cpu maps do not match, disabling group:
          anon group { power/energy-cores/, cycles }
          anon group { instructions, power/energy-cores/ }
      
      Ian asked also for cpu maps details, it's displayed in verbose mode:
      
        $ sudo perf stat -e '{cycles,power/energy-cores/}' -v
        WARNING: group events cpu maps do not match, disabling group:
          anon group { power/energy-cores/, cycles }
             power/energy-cores/: 0
             cycles: 0-7
          anon group { instructions, power/energy-cores/ }
             instructions: 0-7
             power/energy-cores/: 0
      
      Committer testing:
      
        [root@seventh ~]# perf stat -e '{power/energy-cores/,cycles},{instructions,power/energy-cores/}'
        WARNING: grouped events cpus do not match, disabling group:
          anon group { power/energy-cores/, cycles }
          anon group { instructions, power/energy-cores/ }
        ^C
         Performance counter stats for 'system wide':
      
                     12.62 Joules power/energy-cores/
               106,920,637        cycles
                80,228,899        instructions              #    0.75  insn per cycle
                     12.62 Joules power/energy-cores/
      
              14.514476987 seconds time elapsed
      
        [root@seventh ~]#
      
      But if we put compatible events in each group it works:
      
        [root@seventh ~]# perf stat -e '{power/energy-cores/,power/energy-ram/},{instructions,cycles}' -a sleep 2
      
         Performance counter stats for 'system wide':
      
                      1.95 Joules power/energy-cores/
                      0.92 Joules power/energy-ram/
                29,305,715        instructions              #    1.03  insn per cycle
                28,423,338        cycles
      
               2.001438142 seconds time elapsed
      
        [root@seventh ~]#
      
      This needs improvement tho:
      
        [root@seventh ~]# perf stat -e '{power/energy-cores/,power/energy-ram/},{instructions,cycles}' sleep 2
        Error:
        The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (power/energy-cores/).
        /bin/dmesg | grep -i perf may provide additional information.
      
        [root@seventh ~]#
      
      We need to emit a better message, one stating that the power/ events
      can't be used for a specific workload, instead it is per-cpu or system
      wide.
      
      Fixes: 6a4bb04c ("perf tools: Enable grouping logic for parsed events")
      Co-developed-by: NJin Yao <yao.jin@linux.intel.com>
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Acked-by: NIan Rogers <irogers@google.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Michael Petlan <mpetlan@redhat.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200602101736.GE1112120@kravaSigned-off-by: NJin Yao <yao.jin@linux.intel.com>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      a9a17902
  2. 01 6月, 2020 7 次提交
    • I
      perf libdw: Fix off-by 1 relative directory includes · 5cf0e8eb
      Ian Rogers 提交于
      This is currently working due to extra include paths in the build.
      
      Before:
      
        $ cd tools/perf/arch/arm64/util
        $ ls -la ../../util/unwind-libdw.h
        ls: cannot access '../../util/unwind-libdw.h': No such file or directory
      
      After:
      
        $ ls -la ../../../util/unwind-libdw.h
        -rw-r----- 1 irogers irogers 553 Apr 17 14:31 ../../../util/unwind-libdw.h
      Signed-off-by: NIan Rogers <irogers@google.com>
      Acked-by: NNamhyung Kim <namhyung@kernel.org>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200529225232.207532-1-irogers@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      5cf0e8eb
    • T
      perf arm-spe: Support synthetic events · a54ca194
      Tan Xiaojun 提交于
      After the commit ffd3d18c ("perf tools: Add ARM Statistical
      Profiling Extensions (SPE) support") has been merged, it supports to
      output raw data with option "--dump-raw-trace".  However, it misses for
      support synthetic events so cannot output any statistical info.
      
      This patch is to improve the "perf report" support for ARM SPE for four
      types synthetic events:
      
        First level cache synthetic events, including L1 data cache accessing
        and missing events;
        Last level cache synthetic events, including last level cache
        accessing and missing events;
        TLB synthetic events, including TLB accessing and missing events;
        Remote access events, which is used to account load/store operations
        caused to another socket.
      
      Example usage:
      
        $ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000
        $ perf report --stdio
      
        # Samples: 59  of event 'l1d-miss'
        # Event count (approx.): 59
        #
        # Children      Self  Command  Shared Object      Symbol
        # ........  ........  .......  .................  ..................................
        #
            23.73%    23.73%  dd       [kernel.kallsyms]  [k] perf_iterate_ctx.constprop.135
            20.34%    20.34%  dd       [kernel.kallsyms]  [k] filemap_map_pages
             5.08%     5.08%  dd       [kernel.kallsyms]  [k] perf_event_mmap
             5.08%     5.08%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
             5.08%     5.08%  dd       [kernel.kallsyms]  [k] unmap_page_range
             3.39%     3.39%  dd       [kernel.kallsyms]  [k] PageHuge
             3.39%     3.39%  dd       [kernel.kallsyms]  [k] release_pages
             3.39%     3.39%  dd       ld-2.28.so         [.] 0x0000000000008b5c
             1.69%     1.69%  dd       [kernel.kallsyms]  [k] __alloc_fd
             [...]
      
        # Samples: 3K of event 'l1d-access'
        # Event count (approx.): 3980
        #
        # Children      Self  Command  Shared Object      Symbol
        # ........  ........  .......  .................  ......................................
        #
            26.98%    26.98%  dd       [kernel.kallsyms]  [k] ret_to_user
            10.53%    10.53%  dd       [kernel.kallsyms]  [k] fsnotify
             7.51%     7.51%  dd       [kernel.kallsyms]  [k] new_sync_read
             4.57%     4.57%  dd       [kernel.kallsyms]  [k] vfs_read
             4.35%     4.35%  dd       [kernel.kallsyms]  [k] vfs_write
             3.69%     3.69%  dd       [kernel.kallsyms]  [k] __fget_light
             3.69%     3.69%  dd       [kernel.kallsyms]  [k] rw_verify_area
             3.44%     3.44%  dd       [kernel.kallsyms]  [k] security_file_permission
             2.76%     2.76%  dd       [kernel.kallsyms]  [k] __fsnotify_parent
             2.44%     2.44%  dd       [kernel.kallsyms]  [k] ksys_write
             2.24%     2.24%  dd       [kernel.kallsyms]  [k] iov_iter_zero
             2.19%     2.19%  dd       [kernel.kallsyms]  [k] read_iter_zero
             1.81%     1.81%  dd       dd                 [.] 0x0000000000002960
             1.78%     1.78%  dd       dd                 [.] 0x0000000000002980
             [...]
      
        # Samples: 35  of event 'llc-miss'
        # Event count (approx.): 35
        #
        # Children      Self  Command  Shared Object      Symbol
        # ........  ........  .......  .................  ...........................
        #
            34.29%    34.29%  dd       [kernel.kallsyms]  [k] filemap_map_pages
             8.57%     8.57%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
             8.57%     8.57%  dd       [kernel.kallsyms]  [k] unmap_page_range
             5.71%     5.71%  dd       [kernel.kallsyms]  [k] PageHuge
             5.71%     5.71%  dd       [kernel.kallsyms]  [k] release_pages
             5.71%     5.71%  dd       ld-2.28.so         [.] 0x0000000000008b5c
             2.86%     2.86%  dd       [kernel.kallsyms]  [k] __queue_work
             2.86%     2.86%  dd       [kernel.kallsyms]  [k] __radix_tree_lookup
             2.86%     2.86%  dd       [kernel.kallsyms]  [k] copy_page
             [...]
      
        # Samples: 2  of event 'llc-access'
        # Event count (approx.): 2
        #
        # Children      Self  Command  Shared Object      Symbol
        # ........  ........  .......  .................  .............
        #
            50.00%    50.00%  dd       [kernel.kallsyms]  [k] copy_page
            50.00%    50.00%  dd       libc-2.28.so       [.] _dl_addr
      
        # Samples: 48  of event 'tlb-miss'
        # Event count (approx.): 48
        #
        # Children      Self  Command  Shared Object      Symbol
        # ........  ........  .......  .................  ..................................
        #
            20.83%    20.83%  dd       [kernel.kallsyms]  [k] perf_iterate_ctx.constprop.135
            12.50%    12.50%  dd       [kernel.kallsyms]  [k] __arch_clear_user
            10.42%    10.42%  dd       [kernel.kallsyms]  [k] clear_page
             4.17%     4.17%  dd       [kernel.kallsyms]  [k] copy_page
             4.17%     4.17%  dd       [kernel.kallsyms]  [k] filemap_map_pages
             2.08%     2.08%  dd       [kernel.kallsyms]  [k] __alloc_fd
             2.08%     2.08%  dd       [kernel.kallsyms]  [k] __mod_memcg_state.part.70
             2.08%     2.08%  dd       [kernel.kallsyms]  [k] __queue_work
             2.08%     2.08%  dd       [kernel.kallsyms]  [k] __rcu_read_unlock
             2.08%     2.08%  dd       [kernel.kallsyms]  [k] d_path
             2.08%     2.08%  dd       [kernel.kallsyms]  [k] destroy_inode
             2.08%     2.08%  dd       [kernel.kallsyms]  [k] do_dentry_open
             [...]
      
        # Samples: 9K of event 'tlb-access'
        # Event count (approx.): 9573
        #
        # Children      Self  Command  Shared Object      Symbol
        # ........  ........  .......  .................  ......................................
        #
            25.79%    25.79%  dd       [kernel.kallsyms]  [k] __arch_clear_user
            11.22%    11.22%  dd       [kernel.kallsyms]  [k] ret_to_user
             8.56%     8.56%  dd       [kernel.kallsyms]  [k] fsnotify
             4.06%     4.06%  dd       [kernel.kallsyms]  [k] new_sync_read
             3.67%     3.67%  dd       [kernel.kallsyms]  [k] el0_svc_common.constprop.2
             3.04%     3.04%  dd       [kernel.kallsyms]  [k] __fsnotify_parent
             2.90%     2.90%  dd       [kernel.kallsyms]  [k] vfs_write
             2.82%     2.82%  dd       [kernel.kallsyms]  [k] vfs_read
             2.52%     2.52%  dd       libc-2.28.so       [.] write
             2.26%     2.26%  dd       [kernel.kallsyms]  [k] security_file_permission
             2.08%     2.08%  dd       [kernel.kallsyms]  [k] ksys_write
             1.96%     1.96%  dd       [kernel.kallsyms]  [k] rw_verify_area
             1.95%     1.95%  dd       [kernel.kallsyms]  [k] read_iter_zero
             [...]
      
        # Samples: 9  of event 'branch-miss'
        # Event count (approx.): 9
        #
        # Children      Self  Command  Shared Object      Symbol
        # ........  ........  .......  .................  .........................
        #
            22.22%    22.22%  dd       libc-2.28.so       [.] _dl_addr
            11.11%    11.11%  dd       [kernel.kallsyms]  [k] __arch_clear_user
            11.11%    11.11%  dd       [kernel.kallsyms]  [k] __arch_copy_from_user
            11.11%    11.11%  dd       [kernel.kallsyms]  [k] __dentry_kill
            11.11%    11.11%  dd       [kernel.kallsyms]  [k] __efistub_memcpy
            11.11%    11.11%  dd       ld-2.28.so         [.] 0x0000000000012b7c
            11.11%    11.11%  dd       libc-2.28.so       [.] 0x000000000002a980
            11.11%    11.11%  dd       libc-2.28.so       [.] 0x0000000000083340
      
        # Samples: 29  of event 'remote-access'
        # Event count (approx.): 29
        #
        # Children      Self  Command  Shared Object      Symbol
        # ........  ........  .......  .................  ...........................
        #
            41.38%    41.38%  dd       [kernel.kallsyms]  [k] filemap_map_pages
            10.34%    10.34%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
            10.34%    10.34%  dd       [kernel.kallsyms]  [k] unmap_page_range
             6.90%     6.90%  dd       [kernel.kallsyms]  [k] release_pages
             3.45%     3.45%  dd       [kernel.kallsyms]  [k] PageHuge
             3.45%     3.45%  dd       [kernel.kallsyms]  [k] __queue_work
             3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_add_file_rmap
             3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_counter_try_charge
             3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_remove_rmap
             3.45%     3.45%  dd       [kernel.kallsyms]  [k] xas_start
             3.45%     3.45%  dd       ld-2.28.so         [.] 0x0000000000002a1c
             3.45%     3.45%  dd       ld-2.28.so         [.] 0x0000000000008b5c
             3.45%     3.45%  dd       ld-2.28.so         [.] 0x00000000000093cc
      Signed-off-by: NTan Xiaojun <tanxiaojun@huawei.com>
      Tested-by: NJames Clark <james.clark@arm.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Al Grant <al.grant@arm.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
      Cc: Mike Leach <mike.leach@linaro.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Will Deacon <will@kernel.org>
      Cc: linux-arm-kernel@lists.infradead.org
      Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.orgSigned-off-by: NJames Clark <james.clark@arm.com>
      Signed-off-by: NLeo Yan <leo.yan@linaro.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      a54ca194
    • T
      perf auxtrace: Add four itrace options · 9f74d770
      Tan Xiaojun 提交于
      This patch is to add four options to synthesize events which are
      described as below:
      
       'f': synthesize first level cache events
       'm': synthesize last level cache events
       't': synthesize TLB events
       'a': synthesize remote access events
      
      This four options will be used by ARM SPE as their first consumer.
      Signed-off-by: NTan Xiaojun <tanxiaojun@huawei.com>
      Tested-by: NJames Clark <james.clark@arm.com>
      Acked-by: NAdrian Hunter <adrian.hunter@intel.com>
      Cc: Al Grant <al.grant@arm.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
      Cc: Mike Leach <mike.leach@linaro.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Will Deacon <will@kernel.org>
      Cc: linux-arm-kernel@lists.infradead.org
      Link: http://lore.kernel.org/lkml/20200530122442.490-3-leo.yan@linaro.orgSigned-off-by: NJames Clark <james.clark@arm.com>
      Signed-off-by: NLeo Yan <leo.yan@linaro.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      9f74d770
    • T
      perf tools: Move arm-spe-pkt-decoder.h/c to the new dir · 4db25f66
      Tan Xiaojun 提交于
      Create a new arm-spe-decoder directory for subsequent extensions and
      move arm-spe-pkt-decoder.h/c to this directory. No code changes.
      Signed-off-by: NTan Xiaojun <tanxiaojun@huawei.com>
      Tested-by: NJames Clark <james.clark@arm.com>
      Tested-by: NQi Liu <liuqi115@hisilicon.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Al Grant <al.grant@arm.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
      Cc: Mike Leach <mike.leach@linaro.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Will Deacon <will@kernel.org>
      Cc: linux-arm-kernel@lists.infradead.org
      Link: http://lore.kernel.org/lkml/20200530122442.490-2-leo.yan@linaro.orgSigned-off-by: NJames Clark <james.clark@arm.com>
      Signed-off-by: NLeo Yan <leo.yan@linaro.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      4db25f66
    • I
      perf test: Initialize memory in dwarf-unwind · 0fb0d615
      Ian Rogers 提交于
      Avoid a false positive caused by assembly code in arch/x86.
      
      In tests, zero the perf_event to avoid uninitialized memory uses.
      
      Warnings were caught using clang with -fsanitize=memory.
      Signed-off-by: NIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Alexei Starovoitov <ast@kernel.org>
      Cc: Jakub Kicinski <kuba@kernel.org>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Quentin Monnet <quentin@isovalent.com>
      Cc: Stephane Eranian <eranian@google.com>
      Cc: clang-built-linux@googlegroups.com
      Link: http://lore.kernel.org/lkml/20200530082015.39162-4-irogers@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      0fb0d615
    • I
      perf tests: Don't tail call optimize in unwind test · 8617e2e3
      Ian Rogers 提交于
      The tail call optimization can unexpectedly make the stack smaller and
      cause the test to fail.
      Signed-off-by: NIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Alexei Starovoitov <ast@kernel.org>
      Cc: clang-built-linux@googlegroups.com
      Cc: Jakub Kicinski <kuba@kernel.org>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Quentin Monnet <quentin@isovalent.com>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200530082015.39162-3-irogers@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      8617e2e3
    • I
      tools compiler.h: Add attribute to disable tail calls · 21f2b7c1
      Ian Rogers 提交于
      Tail call optimizations can remove stack frames that are used in
      unwinding tests. Add an attribute that can be used to disable the tail
      call optimization. Tested  on clang and GCC.
      
      Committer notes:
      
      Old versions of clang don't like that __attribute__((optimize)), so add
      an ifdef to make it go away.
      Signed-off-by: NIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Alexei Starovoitov <ast@kernel.org>
      Cc: clang-built-linux@googlegroups.com
      Cc: Jakub Kicinski <kuba@kernel.org>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Quentin Monnet <quentin@isovalent.com>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200530082015.39162-2-irogers@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      21f2b7c1
  3. 30 5月, 2020 14 次提交
    • A
      perf build: Add a LIBPFM4=1 build test entry · 9300acc6
      Arnaldo Carvalho de Melo 提交于
      So that when one runs:
      
        $ make -C tools/perf build-test
      
      We make sure that recent changes don't break that opt-in build.
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Alexei Starovoitov <ast@kernel.org>
      Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Andrii Nakryiko <andriin@fb.com>
      Cc: Daniel Borkmann <daniel@iogearbox.net>
      Cc: Florian Fainelli <f.fainelli@gmail.com>
      Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Igor Lubashev <ilubashe@akamai.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Jiwei Sun <jiwei.sun@windriver.com>
      Cc: John Garry <john.garry@huawei.com>
      Cc: Kan Liang <kan.liang@linux.intel.com>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Martin KaFai Lau <kafai@fb.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Yonghong Song <yhs@fb.com>
      Cc: yuzhoujian <yuzhoujian@didichuxing.com>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      9300acc6
    • S
      perf tools: Add optional support for libpfm4 · 70943490
      Stephane Eranian 提交于
      This patch links perf with the libpfm4 library if it is available and
      LIBPFM4 is passed to the build. The libpfm4 library contains hardware
      event tables for all processors supported by perf_events. It is a helper
      library that helps convert from a symbolic event name to the event
      encoding required by the underlying kernel interface. This library is
      open-source and available from: http://perfmon2.sf.net.
      
      With this patch, it is possible to specify full hardware events by name.
      Hardware filters are also supported. Events must be specified via the
      --pfm-events and not -e option. Both options are active at the same time
      and it is possible to mix and match:
      
        $ perf stat --pfm-events inst_retired:any_p:c=1:i -e cycles ....
      
      One needs to explicitely ask for its inclusion by using the LIBPFM4 make
      command line option, ie its opt-in rather than opt-out of feature
      detection and build support.
      Signed-off-by: NStephane Eranian <eranian@google.com>
      Reviewed-by: NIan Rogers <irogers@google.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Alexei Starovoitov <ast@kernel.org>
      Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Andrii Nakryiko <andriin@fb.com>
      Cc: Daniel Borkmann <daniel@iogearbox.net>
      Cc: Florian Fainelli <f.fainelli@gmail.com>
      Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
      Cc: Igor Lubashev <ilubashe@akamai.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Jiwei Sun <jiwei.sun@windriver.com>
      Cc: John Garry <john.garry@huawei.com>
      Cc: Kan Liang <kan.liang@linux.intel.com>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Martin KaFai Lau <kafai@fb.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Yonghong Song <yhs@fb.com>
      Cc: bpf@vger.kernel.org
      Cc: netdev@vger.kernel.org
      Cc: yuzhoujian <yuzhoujian@didichuxing.com>
      Link: http://lore.kernel.org/lkml/20200505182943.218248-2-irogers@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      70943490
    • E
      perf tools: Correct license on jsmn JSON parser · 82352ae2
      Ed Maste 提交于
      This header is part of the jsmn JSON parser, introduced in 867a979a.
      Correct the SPDX tag to indicate that it is under the MIT license.
      Signed-off-by: NEd Maste <emaste@freebsd.org>
      Acked-by: NAndi Kleen <ak@linux.intel.com>
      Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
      Link: http://lore.kernel.org/lkml/20200528170858.48457-1-emaste@freefall.freebsd.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      82352ae2
    • N
      perf jit: Fix inaccurate DWARF line table · 1e4bd2ae
      Nick Gasson 提交于
      Fix an issue where addresses in the DWARF line table are offset by -0x40
      (GEN_ELF_TEXT_OFFSET). This can be seen with `objdump -S` on the ELF
      files after perf inject.
      
      Committer notes:
      
      Ian added this in his Acked-by reply:
      
       ---
      Without too much knowledge this looks good to me. The original code came
      from oprofile's jit support:
      
        https://sourceforge.net/p/oprofile/oprofile/ci/master/tree/opjitconv/debug_line.c#l325
       ---
      Signed-off-by: NNick Gasson <nick.gasson@arm.com>
      Acked-by: NIan Rogers <irogers@google.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200528051916.6722-1-nick.gasson@arm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      1e4bd2ae
    • N
      perf jvmti: Remove redundant jitdump line table entries · 7d7e503c
      Nick Gasson 提交于
      For each PC/BCI pair in the JVMTI compiler inlining record table, the
      jitdump plugin emits debug line table entries for every source line in
      the method preceding that BCI. Instead only emit one source line per
      PC/BCI pair. Reported by Ian Rogers. This reduces the .dump size for
      SPECjbb from ~230MB to ~40MB.
      Signed-off-by: NNick Gasson <nick.gasson@arm.com>
      Acked-by: NIan Rogers <irogers@google.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200528054049.13662-1-nick.gasson@arm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      7d7e503c
    • A
      perf build: Add NO_SDT=1 to the default set of build tests · 60da3a12
      Arnaldo Carvalho de Melo 提交于
      We forgot to add it, so one would have to explicitely ask for it to be
      run, fix that by adding it to the set of tests that are performed by
      default when one does:
      
        $ make -C tools/perf build-test
      
      It was being exercised only in the make_minimal test, this patch makes
      it be tested in isolation, i.e. disabling only this feature.
      
      Fixes: e26e63be ("perf build: Add sdt feature detection")
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Masami Hiramatsu <mhiramat@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      60da3a12
    • A
      perf build: Add NO_LIBCRYPTO=1 to the default set of build tests · 69fbadbe
      Arnaldo Carvalho de Melo 提交于
      We forgot to add it, so one would have to explicitely ask for it to be
      run, fix that by adding it to the set of tests that are performed by
      default when one does:
      
        $ make -C tools/perf build-test
      
      It was being exercised only in the make_minimal test, this patch makes
      it be tested in isolation, i.e. disabling only this feature.
      
      Fixes: 8ee46460 ("perf build: Add libcrypto feature detection")
      Cc: Stephane Eranian <eranian@google.com>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      69fbadbe
    • A
      perf build: Add NO_SYSCALL_TABLE=1 to the build tests · 5bc7aac3
      Arnaldo Carvalho de Melo 提交于
      So that we make sure that even on x86-64 and other architectures where
      that is the default method we test build the fallback to libaudit that
      other architectures use.
      
      I.e. now this line got added to:
      
        $ make -C tools/perf build-test
        <SNIP>
             make_no_syscall_tbl_O: cd . && make NO_SYSCALL_TABLE=1 FEATURES_DUMP=/home/acme/git/perf/tools/perf/BUILD_TEST_FEATURE_DUMP -j12 O=/tmp/tmp.W0HtKR1mfr DESTDIR=/tmp/tmp.lNezgCVPzW
        <SNIP>
        $
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      5bc7aac3
    • A
      perf build: Remove libaudit from the default feature checks · a88f70de
      Arnaldo Carvalho de Melo 提交于
      Ingo reported that the libaudit was always appearing as OFF:
      
        Auto-detecting system features:
        ...                         dwarf: [ on  ]
        ...            dwarf_getlocations: [ on  ]
        ...                         glibc: [ on  ]
        ...                          gtk2: [ on  ]
        ...                      libaudit: [ OFF ]
      
      And everything seemed to work, i.e. we were checking for a feature that
      we don't use, causing confusion for people building perf, so work to
      remove that nuisance while making sure that it works when an arch
      doesn't provide the alternative method to generate the syscall id/name
      conversion tables.
      
      Longer explanation of the new modus operandi:
      
        $ make -C tools/perf O=/tmp/build/perf NO_SYSCALL_TABLE=1
        <SNIP>
        Auto-detecting system features:
        ...                         dwarf: [ on  ]
        ...            dwarf_getlocations: [ on  ]
        ...                         glibc: [ on  ]
        ...                          gtk2: [ on  ]
        ...                        libbfd: [ on  ]
        ...                        libcap: [ on  ]
        ...                        libelf: [ on  ]
        ...                       libnuma: [ on  ]
        ...        numa_num_possible_cpus: [ on  ]
        ...                       libperl: [ on  ]
        ...                     libpython: [ on  ]
        ...                     libcrypto: [ on  ]
        ...                     libunwind: [ on  ]
        ...            libdw-dwarf-unwind: [ on  ]
        ...                          zlib: [ on  ]
        ...                          lzma: [ on  ]
        ...                     get_cpuid: [ on  ]
        ...                           bpf: [ on  ]
        ...                        libaio: [ on  ]
        ...                       libzstd: [ on  ]
        ...        disassembler-four-args: [ on  ]
      
        Makefile.config:665: No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev
          GEN      /tmp/build/perf/common-cmds.h
          MKDIR    /tmp/build/perf/fd/
          MKDIR    /tmp/build/perf/fs/
        <SNIP>
        $
      
      The libaudit test is forced and it fails when audit-libs-devel isn't available:
      
        $ cat /tmp/build/perf/feature/test-libaudit.make.output
        test-libaudit.c:2:10: fatal error: libaudit.h: No such file or directory
            2 | #include <libaudit.h>
              |          ^~~~~~~~~~~~
        compilation terminated.
        $
      
      If we install audit-libs-devel and rebuild it continues not to be shown as OFF
      in the main auto-detection summary, but again gets tested and this time:
      
        $ rpm -q audit-libs-devel
        audit-libs-devel-3.0-0.15.20191104git1c2f876.fc31.x86_64
        $
      
      The make output for the feature detection comes clean:
      
        $ cat /tmp/build/perf/feature/test-libaudit.make.output
      
      And the feature detection binary is successfully built and is dynamicly linked
      with libaudit:
      
        $ ldd /tmp/build/perf/feature/test-libaudit.bin | grep audit
        	libaudit.so.1 => /lib64/libaudit.so.1 (0x00007f5bf5177000)
        $
      
      As well as the resulting perf binary:
      
        $ ldd /tmp/build/perf/perf | grep audit
        	libaudit.so.1 => /lib64/libaudit.so.1 (0x00007fad511c7000)
        $
      
      And 'perf trace' works using the libaudit method:
      
        $ sudo /tmp/build/perf/perf trace -e nanosleep sleep 1
             0.000 (1000.067 ms): sleep/281872 nanosleep(rqtp: 0x7ffedbbe69d0) = 0
        $
      
      If we leave audit-libs-devel installed but don't disable the use of the best
      method, the one using SYSCALL_TABLE, the default for architectures that provide
      the script to build the syscall id/name mapping using the .tbl files copied
      from the kernel sources, we get:
      
        $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf
        $ make -C tools/perf O=/tmp/build/perf
        Auto-detecting system features:
        ...                         dwarf: [ on  ]
        ...            dwarf_getlocations: [ on  ]
        ...                         glibc: [ on  ]
        ...                          gtk2: [ on  ]
        ...                        libbfd: [ on  ]
        ...                        libcap: [ on  ]
        ...                        libelf: [ on  ]
        ...                       libnuma: [ on  ]
        ...        numa_num_possible_cpus: [ on  ]
        ...                       libperl: [ on  ]
        ...                     libpython: [ on  ]
        ...                     libcrypto: [ on  ]
        ...                     libunwind: [ on  ]
        ...            libdw-dwarf-unwind: [ on  ]
        ...                          zlib: [ on  ]
        ...                          lzma: [ on  ]
        ...                     get_cpuid: [ on  ]
        ...                           bpf: [ on  ]
        ...                        libaio: [ on  ]
        ...                       libzstd: [ on  ]
        ...        disassembler-four-args: [ on  ]
      
          GEN      /tmp/build/perf/common-cmds.h
        <SNIP>
        $
      
      Again, no mention of libaudit being on or OFF and:
      
        $ cat /tmp/build/perf/feature/test-libaudit.make.output
        cat: /tmp/build/perf/feature/test-libaudit.make.output: No such file or directory
        $
      
      We didn't even bother checking for its availability, slightly speeding up the
      build process and:
      
        $ ldd /tmp/build/perf/perf | grep libaudit
        $
      
      We don't link with it, also:
      
        $ sudo /tmp/build/perf/perf trace -e nanosleep sleep 1
             0.000 (1000.053 ms): sleep/299125 nanosleep(rqtp: 0x7ffc24611b50) = 0
        $
      
      And globs become available:
      
        $ sudo /tmp/build/perf/perf trace -e *sleep sleep 1
             0.000 (1000.072 ms): sleep/299136 nanosleep(rqtp: 0x7ffe7a3c4ff0) = 0
        $
      Reported-by: NIngo Molnar <mingo@kernel.org>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      a88f70de
    • A
      perf trace: Grow the syscall table as needed when using libaudit · d21cb73a
      Arnaldo Carvalho de Melo 提交于
      The audit-libs API doesn't provide a way to figure out what is the
      syscall with the greatest number/id, take that into account when using
      that method to go on growing the syscall table as we the syscalls go on
      appearing on the radar.
      
      With this the libaudit based method is back working, i.e. when building
      with:
      
        $ make NO_SYSCALL_TABLE=1 O=/tmp/build/perf -C tools/perf install-bin
        <SNIP>
        Auto-detecting system features:
        <SNIP>
        ...                      libaudit: [ on  ]
        ...                        libbfd: [ on  ]
        ...                        libcap: [ on  ]
        <SNIP>
        $ ldd ~/bin/perf | grep audit
      	libaudit.so.1 => /lib64/libaudit.so.1 (0x00007faef22df000)
        $
      
      perf trace is back working, which makes it functional in arches other
      than x86_64, powerpc, arm64 and s390, that provides these generators:
      
        $ find tools/perf/arch/ -name "*syscalltbl*"
        tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
        tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
        tools/perf/arch/s390/entry/syscalls/mksyscalltbl
        tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
        $
      
      Example output forcing the libaudit method on x86_64:
      
        # perf trace -e file,nanosleep sleep 0.001
                 ? (         ): sleep/859090  ... [continued]: execve())                                   = 0
             0.045 ( 0.005 ms): sleep/859090 access(filename: 0x8733e850, mode: R)                         = -1 ENOENT (No such file or directory)
             0.055 ( 0.005 ms): sleep/859090 openat(dfd: CWD, filename: 0x8733ba29, flags: RDONLY|CLOEXEC) = 3
             0.079 ( 0.005 ms): sleep/859090 openat(dfd: CWD, filename: 0x87345d20, flags: RDONLY|CLOEXEC) = 3
             0.085 ( 0.002 ms): sleep/859090 read(fd: 3, buf: 0x7ffd9d483f58, count: 832)                  = 832
             0.090 ( 0.002 ms): sleep/859090 read(fd: 3, buf: 0x7ffd9d483b50, count: 784)                  = 784
             0.094 ( 0.002 ms): sleep/859090 read(fd: 3, buf: 0x7ffd9d483b20, count: 32)                   = 32
             0.098 ( 0.002 ms): sleep/859090 read(fd: 3, buf: 0x7ffd9d483ad0, count: 68)                   = 68
             0.109 ( 0.002 ms): sleep/859090 read(fd: 3, buf: 0x7ffd9d483a50, count: 784)                  = 784
             0.113 ( 0.002 ms): sleep/859090 read(fd: 3, buf: 0x7ffd9d483730, count: 32)                   = 32
             0.117 ( 0.002 ms): sleep/859090 read(fd: 3, buf: 0x7ffd9d483710, count: 68)                   = 68
             0.320 ( 0.008 ms): sleep/859090 openat(dfd: CWD, filename: 0x872c3660, flags: RDONLY|CLOEXEC) = 3
             0.372 ( 1.057 ms): sleep/859090 nanosleep(rqtp: 0x7ffd9d484ac0)                               = 0
        #
      
      There are still some limitations when using the libaudit method, that
      will be fixed at some point, i.e., this works with the mksyscalltbl
      method but not with libaudit's:
      
        # perf trace -e file,*sleep sleep 0.001
        event syntax error: '*sleep'
                             \___ parser error
        Run 'perf list' for a list of valid events
      
         Usage: perf trace [<options>] [<command>]
            or: perf trace [<options>] -- <command> [<options>]
            or: perf trace record [<options>] [<command>]
            or: perf trace record [<options>] -- <command> [<options>]
      
            -e, --event <event>   event/syscall selector. use 'perf list' to list available events
        #
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      d21cb73a
    • A
      perf trace: Use zalloc() to make sure all fields are zeroed in the syscalltbl constructor · a9e8c1f8
      Arnaldo Carvalho de Melo 提交于
      In the past this wasn't needed as the libaudit based code would use just
      one field, and the alternative constructor would fill in all the fields,
      but now that even when using the libaudit based method we need the other
      fields, switch to zalloc() to make sure the other fields are zeroed at
      instantiation time.
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      a9e8c1f8
    • A
      perf trace: Remove union from syscalltbl, all the fields are needed · db6b8cc8
      Arnaldo Carvalho de Melo 提交于
      When we moved to a syscalltbl generated from the kernel syscall tables
      (arch/..../syscall*.tbl) the idea was to either use it, when having the
      generator (e.g. tools/perf/arch/x86/entry/syscalls/syscalltbl.sh), or
      falling back to the previous audit-libs based way of mapping syscall ids
      to strings and the other way around.
      
      At first we just needed the audit_detect_machine() return to then use it
      to the str->id/id->str, or the other fields for the now used by default
      in the most well developed arches method of using the syscall table
      generator.
      
      The problem is that then the libaudit code fell into disrepair, and
      architectures where it is the method used are not working.
      
      Now, with NO_SYSCALL_TABLE=1 being possible to pass on the make command
      line we can automate the testing of that method even on x86-64, arm64,
      etc.
      
      And doing it I noted that we actually use fields in both entries in the
      union, oops, so ditch the union, as we need all those fields at the same
      time.
      
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      db6b8cc8
    • A
      perf build: Allow explicitely disabling the NO_SYSCALL_TABLE variable · 43de3869
      Arnaldo Carvalho de Melo 提交于
      This is useful to see if, on x86, the legacy libaudit still works, as it
      is used in architectures that don't have the SYSCALL_TABLE logic and we
      want to have it tested in 'make -C tools/perf/ build-test'.
      
      E.g.:
      
      Without having audit-libs-devel installed:
      
        $ make NO_SYSCALL_TABLE=1 O=/tmp/build/perf -C tools/perf install-bin
        make: Entering directory '/home/acme/git/perf/tools/perf'
          BUILD:   Doing 'make -j12' parallel build
        <SNIP>
        Auto-detecting system features:
        <SNIP>
        ...                      libaudit: [ OFF ]
        ...                        libbfd: [ on  ]
        ...                        libcap: [ on  ]
        <SNIP>
        Makefile.config:664: No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev
        <SNIP>
      
      After installing it:
      
        $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf
        $ time make NO_SYSCALL_TABLE=1 O=/tmp/build/perf  -C tools/perf install-bin ; perf test python
        make: Entering directory '/home/acme/git/perf/tools/perf'
          BUILD:   Doing 'make -j12' parallel build
          HOSTCC   /tmp/build/perf/fixdep.o
          HOSTLD   /tmp/build/perf/fixdep-in.o
          LINK     /tmp/build/perf/fixdep
        Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h'
        diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h
        Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h'
        diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
        Warning: Kernel ABI header at 'tools/perf/util/hashmap.c' differs from latest version at 'tools/lib/bpf/hashmap.c'
        diff -u tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
      
        Auto-detecting system features:
        <SNIP>
        ...                      libaudit: [ on  ]
        ...                        libbfd: [ on  ]
        ...                        libcap: [ on  ]
        <SNIP>
        $ ldd ~/bin/perf | grep audit
        	libaudit.so.1 => /lib64/libaudit.so.1 (0x00007fc18978e000)
        $
      Acked-by: NJiri Olsa <jolsa@kernel.org>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Ingo Molnar <mingo@kernel.org>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Link: http://lore.kernel.org/lkml/20200529155552.463-3-acme@kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      43de3869
    • A
      perf build: Group the NO_SYSCALL_TABLE logic · 9b90d973
      Arnaldo Carvalho de Melo 提交于
      To help in allowing to disable it from the make command line.
      Acked-by: NJiri Olsa <jolsa@kernel.org>
      Cc: Adrian Hunter <adrian.hunter@intel.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Link: http://lore.kernel.org/lkml/20200529155552.463-2-acme@kernel.org
      [ Fixed the logic for the filter part, it should be ifeq, not ifneq ]
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      9b90d973
  4. 28 5月, 2020 17 次提交
    • A
      perf intel-pt: Refine kernel decoding only warning message · 9b2d2066
      Adrian Hunter 提交于
      Stop the message displaying when user space is not being traced.
      
      Example:
      
        Prerequisites:
      
          sudo setcap "cap_sys_rawio,cap_sys_admin,cap_sys_ptrace,cap_syslog,cap_ipc_lock=ep" ~/bin/perf
          sudo chmod +r /proc/kcore
      
        Before:
      
          $ perf record --no-switch-events --kcore -a -e intel_pt//k -- sleep 0.001
          Warning:
          Intel Processor Trace decoding will not be possible except for kernel tracing!
          [ perf record: Woken up 1 times to write data ]
          [ perf record: Captured and wrote 0.838 MB perf.data ]
      
        After:
      
          $ perf record --no-switch-events --kcore -a -e intel_pt//k -- sleep 0.001
          [ perf record: Woken up 1 times to write data ]
          [ perf record: Captured and wrote 1.068 MB perf.data ]
      
          $ sudo chmod go-r /proc/kcore
          $ sudo setcap -r ~/bin/perf
      Signed-off-by: NAdrian Hunter <adrian.hunter@intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
      Link: http://lore.kernel.org/lkml/20200528120859.21604-2-adrian.hunter@intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      9b2d2066
    • A
      perf record: Respect --no-switch-events · 16b4b4e1
      Adrian Hunter 提交于
      Context switch events are added automatically by Intel PT and Coresight.
      
      Make it possible to suppress them. That is useful for tracing the
      scheduler without the disturbance that the switch event processing
      creates.
      
      Example:
      
        Prerequisites:
      
          $ which perf
          ~/bin/perf
          $ sudo setcap "cap_sys_rawio,cap_sys_admin,cap_sys_ptrace,cap_syslog,cap_ipc_lock=ep" ~/bin/perf
          $ sudo chmod +r /proc/kcore
      
        Before:
      
          $ perf record --no-switch-events --kcore -a -e intel_pt//k -- sleep 0.001
          [ perf record: Woken up 1 times to write data ]
          [ perf record: Captured and wrote 0.938 MB perf.data ]
          $ perf script -D | grep PERF_RECORD_SWITCH | wc -l
          572
      
        After:
      
          $ perf record --no-switch-events --kcore -a -e intel_pt//k -- sleep 0.001
          Warning:
          Intel Processor Trace decoding will not be possible except for kernel tracing!
          [ perf record: Woken up 1 times to write data ]
          [ perf record: Captured and wrote 0.838 MB perf.data ]
          $ perf script -D | grep PERF_RECORD_SWITCH | wc -l
          0
      
          $ sudo chmod go-r /proc/kcore
          $ sudo setcap -r ~/bin/perf
      Signed-off-by: NAdrian Hunter <adrian.hunter@intel.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
      Link: http://lore.kernel.org/lkml/20200528120859.21604-1-adrian.hunter@intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      16b4b4e1
    • A
      perf script: Fix --call-trace for Intel PT · b5164085
      Adrian Hunter 提交于
      Make process_attr() respect -F-ip, noting also that the condition in
      process_attr() (callchain_param.record_mode != CALLCHAIN_NONE) is always
      true so test the sample type directly.
      
      Example:
      
        Before:
      
          $ perf record -e intel_pt//u uname
          Linux
          [ perf record: Woken up 1 times to write data ]
          [ perf record: Captured and wrote 0.033 MB perf.data ]
          $ perf script --call-trace | head -5
                 uname 30992 [006] 41758.313696574:  cbr: 42 freq: 4219 MHz (156%)                    0 [unknown] ([unknown]                                         )
                 uname 30992 [006] 41758.313696907: _start                               7f71792c4100 _start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )
                 uname 30992 [006] 41758.313699574:     _dl_start                        7f71792c4103 _start+0x3 (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )
                 uname 30992 [006] 41758.313699907:     _dl_start                        7f71792c4e18 _dl_start+0x28 (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )
                 uname 30992 [006] 41758.313701574:     _dl_start                        7f71792c5128 _dl_start+0x338 (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )
      
        After:
      
          $ perf script --call-trace | head -5
                 uname 30992 [006] 41758.313696574:  cbr: 42 freq: 4219 MHz (156%)
                 uname 30992 [006] 41758.313696907: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )      _start
                 uname 30992 [006] 41758.313699574: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )          _dl_start
                 uname 30992 [006] 41758.313699907: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )          _dl_start
                 uname 30992 [006] 41758.313701574: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )          _dl_start
      
      Fixes: f288e8e1aa4f ("perf script: Enable IP fields for callchains")
      Signed-off-by: NAdrian Hunter <adrian.hunter@intel.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Link: http://lore.kernel.org/lkml/20200527180250.16723-1-adrian.hunter@intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      b5164085
    • A
      perf evlist: Disable 'immediate' events last · 87cf8360
      Adrian Hunter 提交于
      Events marked as 'immediate' are started before other events to ensure
      that there is context at the start of the main tracing events. The same
      is true at the end of tracing, so disable 'immediate' events after other
      events.
      Signed-off-by: NAdrian Hunter <adrian.hunter@intel.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Borislav Petkov <bp@alien8.de>
      Cc: H. Peter Anvin <hpa@zytor.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Masami Hiramatsu <mhiramat@kernel.org>
      Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
      Cc: x86@kernel.org
      Link: http://lore.kernel.org/lkml/20200512121922.8997-11-adrian.hunter@intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      87cf8360
    • A
      perf kcore_copy: Fix module map when there are no modules loaded · 61f82e3f
      Adrian Hunter 提交于
      In the absence of any modules, no "modules" map is created, but there
      are other executable pages to map, due to eBPF JIT, kprobe or ftrace.
      Map them by recognizing that the first "module" symbol is not
      necessarily from a module, and adjust the map accordingly.
      Signed-off-by: NAdrian Hunter <adrian.hunter@intel.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Borislav Petkov <bp@alien8.de>
      Cc: H. Peter Anvin <hpa@zytor.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Leo Yan <leo.yan@linaro.org>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Masami Hiramatsu <mhiramat@kernel.org>
      Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
      Cc: x86@kernel.org
      Link: http://lore.kernel.org/lkml/20200512121922.8997-10-adrian.hunter@intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      61f82e3f
    • N
      perf jvmti: Fix demangling Java symbols · 0bdf3181
      Nick Gasson 提交于
      For a Java method signature like:
      
          Ljava/lang/AbstractStringBuilder;appendChars(Ljava/lang/String;II)V
      
      The demangler produces:
      
          void class java.lang.AbstractStringBuilder.appendChars(class java.lang., shorttring., int, int)
      
      The arguments should be (java.lang.String, int, int) but the demangler
      interprets the "S" in String as the type code for "short". Correct this
      and two other minor things:
      
      - There is no "bool" type in Java, should be "boolean".
      
      - The demangler prepends "class" to every Java class name. This is not
        standard Java syntax and it wastes a lot of horizontal space if the
        signature is long. Remove this as there isn't any ambiguity between
        class names and primitives.
      
      Committer notes:
      
      This was split from a larger patch that also added a java demangler
      'perf test' entry, that, before this patch shows the error being fixed
      by it:
      
        $ perf test java
        65: Demangle Java                                         : FAILED!
        $ perf test -v java
        Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc
        65: Demangle Java                                         :
        --- start ---
        test child forked, pid 307264
        FAILED: Ljava/lang/StringLatin1;equals([B[B)Z: bool class java.lang.StringLatin1.equals(byte[], byte[]) != boolean java.lang.StringLatin1.equals(byte[], byte[])
        FAILED: Ljava/util/zip/ZipUtils;CENSIZ([BI)J: long class java.util.zip.ZipUtils.CENSIZ(byte[], int) != long java.util.zip.ZipUtils.CENSIZ(byte[], int)
        FAILED: Ljava/util/regex/Pattern$BmpCharProperty;match(Ljava/util/regex/Matcher;ILjava/lang/CharSequence;)Z: bool class java.util.regex.Pattern$BmpCharProperty.match(class java.util.regex.Matcher., int, class java.lang., charhar, shortequence) != boolean java.util.regex.Pattern$BmpCharProperty.match(java.util.regex.Matcher, int, java.lang.CharSequence)
        FAILED: Ljava/lang/AbstractStringBuilder;appendChars(Ljava/lang/String;II)V: void class java.lang.AbstractStringBuilder.appendChars(class java.lang., shorttring., int, int) != void java.lang.AbstractStringBuilder.appendChars(java.lang.String, int, int)
        FAILED: Ljava/lang/Object;<init>()V: void class java.lang.Object<init>() != void java.lang.Object<init>()
        test child finished with -1
        ---- end ----
        Demangle Java: FAILED!
        $
      
      After applying this patch:
      
        $ perf test  java
        65: Demangle Java                                         : Ok
        $
      Signed-off-by: NNick Gasson <nick.gasson@arm.com>
      Reviewed-by: NIan Rogers <irogers@google.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Tested-by: NIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Link: http://lore.kernel.org/lkml/20200427061520.24905-4-nick.gasson@arm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      0bdf3181
    • N
      perf tests: Add test for the java demangler · 525c821d
      Nick Gasson 提交于
      Split from a larger patch that was also fixing a problem with the java
      demangler, so, before applying that patch we see:
      
        $ perf test java
        65: Demangle Java                                         : FAILED!
        $ perf test -v java
        65: Demangle Java                                         :
        --- start ---
        test child forked, pid 307264
        FAILED: Ljava/lang/StringLatin1;equals([B[B)Z: bool class java.lang.StringLatin1.equals(byte[], byte[]) != boolean java.lang.StringLatin1.equals(byte[], byte[])
        FAILED: Ljava/util/zip/ZipUtils;CENSIZ([BI)J: long class java.util.zip.ZipUtils.CENSIZ(byte[], int) != long java.util.zip.ZipUtils.CENSIZ(byte[], int)
        FAILED: Ljava/util/regex/Pattern$BmpCharProperty;match(Ljava/util/regex/Matcher;ILjava/lang/CharSequence;)Z: bool class java.util.regex.Pattern$BmpCharProperty.match(class java.util.regex.Matcher., int, class java.lang., charhar, shortequence) != boolean java.util.regex.Pattern$BmpCharProperty.match(java.util.regex.Matcher, int, java.lang.CharSequence)
        FAILED: Ljava/lang/AbstractStringBuilder;appendChars(Ljava/lang/String;II)V: void class java.lang.AbstractStringBuilder.appendChars(class java.lang., shorttring., int, int) != void java.lang.AbstractStringBuilder.appendChars(java.lang.String, int, int)
        FAILED: Ljava/lang/Object;<init>()V: void class java.lang.Object<init>() != void java.lang.Object<init>()
        test child finished with -1
        ---- end ----
        Demangle Java: FAILED!
        $
      
      Next patch should fix this.
      Signed-off-by: NNick Gasson <nick.gasson@arm.com>
      Reviewed-by: NIan Rogers <irogers@google.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Tested-by: NIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Link: http://lore.kernel.org/lkml/20200427061520.24905-4-nick.gasson@arm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      525c821d
    • N
      perf jvmti: Do not report error when missing debug information · 959f8ed4
      Nick Gasson 提交于
      If the Java sources are compiled with -g:none to disable debug
      information the perf JVMTI plugin reports a lot of errors like:
      
        java: GetLineNumberTable failed with JVMTI_ERROR_ABSENT_INFORMATION
        java: GetLineNumberTable failed with JVMTI_ERROR_ABSENT_INFORMATION
        java: GetLineNumberTable failed with JVMTI_ERROR_ABSENT_INFORMATION
        java: GetLineNumberTable failed with JVMTI_ERROR_ABSENT_INFORMATION
        java: GetLineNumberTable failed with JVMTI_ERROR_ABSENT_INFORMATION
      
      Instead if GetLineNumberTable returns JVMTI_ERROR_ABSENT_INFORMATION
      simply skip emitting line number information for that method. Unlike the
      previous patch these errors don't affect the jitdump generation, they
      just generate a lot of noise.
      
      Similarly for native methods which also don't have line tables.
      Signed-off-by: NNick Gasson <nick.gasson@arm.com>
      Reviewed-by: NIan Rogers <irogers@google.com>
      Tested-by: NIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Link: http://lore.kernel.org/lkml/20200427061520.24905-3-nick.gasson@arm.com
      [ Moved || operator to the end of the line, not at the start of 2nd if condition ]
      Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      959f8ed4
    • N
      perf jvmti: Fix jitdump for methods without debug info · 953e9240
      Nick Gasson 提交于
      If a Java class is compiled with -g:none to omit debug information, the
      JVMTI plugin won't write jitdump entries for any method in this class
      and prints a lot of errors like:
      
          java: GetSourceFileName failed with JVMTI_ERROR_ABSENT_INFORMATION
      
      The call to GetSourceFileName is used to derive the file name `fn`, but
      this value is not actually used since commit ca58d7e6 ("perf jvmti:
      Generate correct debug information for inlined code") which moved the
      file name lookup into fill_source_filenames(). So the call to
      GetSourceFileName and related code can be safely removed.
      Signed-off-by: NNick Gasson <nick.gasson@arm.com>
      Reviewed-by: NIan Rogers <irogers@google.com>
      Tested-by: NIan Rogers <irogers@google.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Link: http://lore.kernel.org/lkml/20200427061520.24905-2-nick.gasson@arm.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      953e9240
    • A
      perf symbols: Fix debuginfo search for Ubuntu · 85afd355
      Adrian Hunter 提交于
      Reportedly, from 19.10 Ubuntu has begun mixing up the location of some
      debug symbol files, putting files expected to be in
      /usr/lib/debug/usr/lib into /usr/lib/debug/lib instead. Fix by adding
      another dso_binary_type.
      
      Example on Ubuntu 20.04
      
        Before:
      
          $ perf record -e intel_pt//u uname
          Linux
          [ perf record: Woken up 1 times to write data ]
          [ perf record: Captured and wrote 0.030 MB perf.data ]
          $ perf script --call-trace | head -5
                 uname 14003 [005] 15321.764958566:  cbr: 42 freq: 4219 MHz (156%)
                 uname 14003 [005] 15321.764958566: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )          7f1e71cc4100
                 uname 14003 [005] 15321.764961566: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )              7f1e71cc4df0
                 uname 14003 [005] 15321.764961900: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )              7f1e71cc4e18
                 uname 14003 [005] 15321.764963233: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )              7f1e71cc5128
      
        After:
      
          $ perf script --call-trace | head -5
                 uname 14003 [005] 15321.764958566:  cbr: 42 freq: 4219 MHz (156%)
                 uname 14003 [005] 15321.764958566: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )      _start
                 uname 14003 [005] 15321.764961566: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )          _dl_start
                 uname 14003 [005] 15321.764961900: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )          _dl_start
                 uname 14003 [005] 15321.764963233: (/usr/lib/x86_64-linux-gnu/ld-2.31.so              )          _dl_start
      Reported-by: NTravis Downs <travis.downs@gmail.com>
      Signed-off-by: NAdrian Hunter <adrian.hunter@intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: stable@vger.kernel.org
      Link: http://lore.kernel.org/lkml/20200526155207.9172-1-adrian.hunter@intel.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      85afd355
    • J
      perf parse: Add 'struct parse_events_state' pointer to scanner · 1244a327
      Jiri Olsa 提交于
      We need to pass more data to the scanner so let's start with having it
      to take pointer to 'struct parse_events_state' object instead of just
      start token.
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Michael Petlan <mpetlan@redhat.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200524224219.234847-4-jolsa@kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      1244a327
    • J
      perf stat: Do not pass avg to generic_metric · 5f09ca5a
      Jiri Olsa 提交于
      There's no need to pass the given evsel's count to metric data, because
      it will be pushed again within the following metric_events loop.
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Michael Petlan <mpetlan@redhat.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200524224219.234847-3-jolsa@kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      5f09ca5a
    • J
      perf tests: Consider subtests when searching for user specified tests · d685e6c1
      Jiri Olsa 提交于
      It's now possible to put subtest name as a test filter:
      
        $ perf test 'PMU event table sanity'
        10: PMU events                                            :
        10.1: PMU event table sanity                              : Ok
      
      Committer testing:
      
      Before:
      
        $ perf test 'PMU event table sanity'
        $
      
      After:
      
        $ perf test 'PMU event table sanity'
        10: PMU events                                            :
        10.1: PMU event table sanity                              : Ok
        $
      Signed-off-by: NJiri Olsa <jolsa@kernel.org>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Ian Rogers <irogers@google.com>
      Cc: Michael Petlan <mpetlan@redhat.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200524224219.234847-2-jolsa@kernel.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      d685e6c1
    • I
      perf list: Add metrics to command line usage · a90a1c54
      Ian Rogers 提交于
      Before:
      
       Usage: perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]
      
      After:
      
       Usage: perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]
      
      Committer testing:
      
      Before and after we get these outputs on a Lenovo t480s (i7-8650U):
      
        # perf list metricgroup
      
        List of pre-defined events (to be used in -e):
      
        Metric Groups:
      
        BrMispredicts
        BrMispredicts_SMT
        Branches
        Cache_Misses
        DSB
        FLOPS
        FLOPS_SMT
        Fetch_BW
        IcMiss
        Instruction_Type
        Memory_BW
        Memory_Bound
        Memory_Lat
        No_group
        PGO
        Pipeline
        Power
        Retire
        SMT
        Summary
        TLB
        TLB_SMT
        TopDownL1
        TopDownL1_SMT
        TopdownL1
        TopdownL1_SMT
        #
      
        # perf list metric | head -11
      
        Metrics:
      
          Backend_Bound
               [This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend]
          Backend_Bound_SMT
               [This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU]
          Bad_Speculation
               [This category represents fraction of slots wasted due to incorrect speculations]
          Bad_Speculation_SMT
               [This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU]
        #
      Signed-off-by: NIan Rogers <irogers@google.com>
      Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Jiri Olsa <jolsa@redhat.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Stephane Eranian <eranian@google.com>
      Link: http://lore.kernel.org/lkml/20200522064546.164259-1-irogers@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      a90a1c54
    • A
      perf script: Don't force less for non tty output with --xed · 8c3e05c8
      Andi Kleen 提交于
      --xed currently forces less. When piping the output to other scripts
      this can waste a lot of CPU time because less is rather slow.
      I've seen it using up a full core on its own in a pipeline.
      Only force less when the output is actually a terminal.
      Signed-off-by: NAndi Kleen <ak@linux.intel.com>
      Cc: Jiri Olsa <jolsa@kernel.org>
      Link: http://lore.kernel.org/lkml/20200522020914.527564-1-andi@firstfloor.orgSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      8c3e05c8
    • I
      perf metricgroup: Remove unnecessary ',' from events · e2ce1059
      Ian Rogers 提交于
      Remove unnecessary commas from events before they are parsed. This
      avoids ',' being echoed by parse-events.l.
      Signed-off-by: NIan Rogers <irogers@google.com>
      Acked-by: NJiri Olsa <jolsa@redhat.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Andrii Nakryiko <andriin@fb.com>
      Cc: Cong Wang <xiyou.wangcong@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: John Garry <john.garry@huawei.com>
      Cc: Kajol Jain <kjain@linux.ibm.com>
      Cc: Kan Liang <kan.liang@linux.intel.com>
      Cc: Kim Phillips <kim.phillips@amd.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Paul Clarke <pc@us.ibm.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Song Liu <songliubraving@fb.com>
      Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
      Cc: Stephane Eranian <eranian@google.com>
      Cc: Vince Weaver <vincent.weaver@maine.edu>
      Cc: bpf@vger.kernel.org
      Cc: netdev@vger.kernel.org
      Link: http://lore.kernel.org/lkml/20200520182011.32236-8-irogers@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      e2ce1059
    • I
      perf metricgroup: Add options to not group or merge · 05530a79
      Ian Rogers 提交于
      Add --metric-no-group that causes all events within metrics to not be
      grouped. This can allow the event to get more time when multiplexed, but
      may also lower accuracy.
      Add --metric-no-merge option. By default events in different metrics may
      be shared if the group of events for one metric is the same or larger
      than that of the second. Sharing may increase or lower accuracy and so
      is now configurable.
      Signed-off-by: NIan Rogers <irogers@google.com>
      Acked-by: NJiri Olsa <jolsa@redhat.com>
      Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Andrii Nakryiko <andriin@fb.com>
      Cc: Cong Wang <xiyou.wangcong@gmail.com>
      Cc: Jin Yao <yao.jin@linux.intel.com>
      Cc: John Garry <john.garry@huawei.com>
      Cc: Kajol Jain <kjain@linux.ibm.com>
      Cc: Kan Liang <kan.liang@linux.intel.com>
      Cc: Kim Phillips <kim.phillips@amd.com>
      Cc: Mark Rutland <mark.rutland@arm.com>
      Cc: Namhyung Kim <namhyung@kernel.org>
      Cc: Paul Clarke <pc@us.ibm.com>
      Cc: Peter Zijlstra <peterz@infradead.org>
      Cc: Song Liu <songliubraving@fb.com>
      Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
      Cc: Stephane Eranian <eranian@google.com>
      Cc: Vince Weaver <vincent.weaver@maine.edu>
      Cc: bpf@vger.kernel.org
      Cc: netdev@vger.kernel.org
      Link: http://lore.kernel.org/lkml/20200520182011.32236-7-irogers@google.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
      05530a79