提交 12ef9bda 编写于 作者: D Daniel Borkmann

Merge branch 'bpf-perf-sample-addr'

Teng Qin says:

====================
These patches add support that allows bpf programs attached to perf events to
read the address values recorded with the perf events. These values are
requested by specifying sample_type with PERF_SAMPLE_ADDR when calling
perf_event_open().

The main motivation for these changes is to support building memory or lock
access profiling and tracing tools. For example on Intel CPUs, the recorded
address values for supported memory or lock access perf events would be
the access or lock target addresses from PEBS buffer. Such information would
be very valuable for building tools that help understand memory access or
lock acquire pattern.
====================
Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
......@@ -13,6 +13,7 @@
struct bpf_perf_event_data {
bpf_user_pt_regs_t regs;
__u64 sample_period;
__u64 addr;
};
#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
......@@ -726,8 +726,7 @@ const struct bpf_prog_ops tracepoint_prog_ops = {
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
sample_period);
const int size_u64 = sizeof(u64);
if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
return false;
......@@ -738,8 +737,13 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
switch (off) {
case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
bpf_ctx_record_field_size(info, size_sp);
if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
bpf_ctx_record_field_size(info, size_u64);
if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
return false;
break;
case bpf_ctx_range(struct bpf_perf_event_data, addr):
bpf_ctx_record_field_size(info, size_u64);
if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
return false;
break;
default:
......@@ -766,6 +770,14 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(struct perf_sample_data, period, 8,
target_size));
break;
case offsetof(struct bpf_perf_event_data, addr):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
data), si->dst_reg, si->src_reg,
offsetof(struct bpf_perf_event_data_kern, data));
*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
bpf_target_off(struct perf_sample_data, addr, 8,
target_size));
break;
default:
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
regs), si->dst_reg, si->src_reg,
......
......@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
{
char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
char time_fmt2[] = "Get Time Failed, ErrCode: %d";
char addr_fmt[] = "Address recorded on event: %llx";
char fmt[] = "CPU-%d period %lld ip %llx";
u32 cpu = bpf_get_smp_processor_id();
struct bpf_perf_event_value value_buf;
......@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
else
bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
if (ctx->addr != 0)
bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
val = bpf_map_lookup_elem(&counts, &key);
if (val)
(*val)++;
......
......@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
/* Intel Instruction Retired */
.config = 0xc0,
};
struct perf_event_attr attr_type_raw_lock_load = {
.sample_freq = SAMPLE_FREQ,
.freq = 1,
.type = PERF_TYPE_RAW,
/* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
.config = 0x21d0,
/* Request to record lock address from PEBS */
.sample_type = PERF_SAMPLE_ADDR,
/* Record address value requires precise event */
.precise_ip = 2,
};
printf("Test HW_CPU_CYCLES\n");
test_perf_event_all_cpu(&attr_type_hw);
......@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
test_perf_event_all_cpu(&attr_type_raw);
test_perf_event_task(&attr_type_raw);
printf("Test Lock Load\n");
test_perf_event_all_cpu(&attr_type_raw_lock_load);
test_perf_event_task(&attr_type_raw_lock_load);
printf("*** PASS ***\n");
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册