提交 e980a076 编写于 作者: D David S. Miller

Merge branch 'bpf-event-output-helper-improvements'

Daniel Borkmann says:

====================
BPF event output helper improvements

This set adds improvements to the BPF event output helper to
support non-linear data sampling, here specifically, for skb
context. For details please see individual patches. The set
is based against net-next tree.

v1 -> v2:
  - Integrated and adapted Peter's diff into patch 1, updated
    the remaining ones accordingly. Thanks Peter!
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -979,12 +979,15 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
struct pt_regs regs;
struct perf_sf_sde_regs *sde_regs;
struct perf_sample_data data;
struct perf_raw_record raw;
struct perf_raw_record raw = {
.frag = {
.size = sfr->size,
.data = sfr,
},
};
/* Setup perf sample */
perf_sample_data_init(&data, 0, event->hw.last_period);
raw.size = sfr->size;
raw.data = sfr;
data.raw = &raw;
/* Setup pt_regs to look like an CPU-measurement external interrupt
......
......@@ -655,8 +655,12 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
}
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.size = sizeof(u32) + ibs_data.size;
raw.data = ibs_data.data;
raw = (struct perf_raw_record){
.frag = {
.size = sizeof(u32) + ibs_data.size,
.data = ibs_data.data,
},
};
data.raw = &raw;
}
......
......@@ -209,7 +209,12 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
const struct bpf_func_proto *bpf_get_event_output_proto(void);
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
unsigned long len);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
......
......@@ -69,9 +69,22 @@ struct perf_callchain_entry_ctx {
bool contexts_maxed;
};
typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
unsigned long len);
struct perf_raw_frag {
union {
struct perf_raw_frag *next;
unsigned long pad;
};
perf_copy_f copy;
void *data;
u32 size;
} __packed;
struct perf_raw_record {
struct perf_raw_frag frag;
u32 size;
void *data;
};
/*
......@@ -1283,6 +1296,11 @@ extern void perf_restore_debug_store(void);
static inline void perf_restore_debug_store(void) { }
#endif
static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
{
return frag->pad < sizeof(u64);
}
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
/*
......
......@@ -401,6 +401,8 @@ enum bpf_func_id {
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
......
......@@ -1054,9 +1054,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
return NULL;
}
const struct bpf_func_proto * __weak bpf_get_event_output_proto(void)
u64 __weak
bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
return NULL;
return -ENOTSUPP;
}
/* Always built-in helper functions. */
......
......@@ -5553,16 +5553,26 @@ void perf_output_sample(struct perf_output_handle *handle,
}
if (sample_type & PERF_SAMPLE_RAW) {
if (data->raw) {
u32 raw_size = data->raw->size;
u32 real_size = round_up(raw_size + sizeof(u32),
sizeof(u64)) - sizeof(u32);
u64 zero = 0;
perf_output_put(handle, real_size);
__output_copy(handle, data->raw->data, raw_size);
if (real_size - raw_size)
__output_copy(handle, &zero, real_size - raw_size);
struct perf_raw_record *raw = data->raw;
if (raw) {
struct perf_raw_frag *frag = &raw->frag;
perf_output_put(handle, raw->size);
do {
if (frag->copy) {
__output_custom(handle, frag->copy,
frag->data, frag->size);
} else {
__output_copy(handle, frag->data,
frag->size);
}
if (perf_raw_frag_last(frag))
break;
frag = frag->next;
} while (1);
if (frag->pad)
__output_skip(handle, NULL, frag->pad);
} else {
struct {
u32 size;
......@@ -5687,14 +5697,28 @@ void perf_prepare_sample(struct perf_event_header *header,
}
if (sample_type & PERF_SAMPLE_RAW) {
int size = sizeof(u32);
if (data->raw)
size += data->raw->size;
else
size += sizeof(u32);
struct perf_raw_record *raw = data->raw;
int size;
if (raw) {
struct perf_raw_frag *frag = &raw->frag;
u32 sum = 0;
do {
sum += frag->size;
if (perf_raw_frag_last(frag))
break;
frag = frag->next;
} while (1);
size = round_up(sum + sizeof(u32), sizeof(u64));
raw->size = size - sizeof(u32);
frag->pad = raw->size - sum;
} else {
size = sizeof(u64);
}
header->size += round_up(size, sizeof(u64));
header->size += size;
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
......@@ -7331,7 +7355,7 @@ static struct pmu perf_swevent = {
static int perf_tp_filter_match(struct perf_event *event,
struct perf_sample_data *data)
{
void *record = data->raw->data;
void *record = data->raw->frag.data;
/* only top level events have filters set */
if (event->parent)
......@@ -7387,8 +7411,10 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct perf_event *event;
struct perf_raw_record raw = {
.size = entry_size,
.data = record,
.frag = {
.size = entry_size,
.data = record,
},
};
perf_sample_data_init(&data, 0, 0);
......
......@@ -123,10 +123,7 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb)
return rb->aux_nr_pages << PAGE_SHIFT;
}
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
static inline unsigned long \
func_name(struct perf_output_handle *handle, \
const void *buf, unsigned long len) \
#define __DEFINE_OUTPUT_COPY_BODY(memcpy_func) \
{ \
unsigned long size, written; \
\
......@@ -152,6 +149,17 @@ func_name(struct perf_output_handle *handle, \
return len; \
}
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
static inline unsigned long \
func_name(struct perf_output_handle *handle, \
const void *buf, unsigned long len) \
__DEFINE_OUTPUT_COPY_BODY(memcpy_func)
static inline unsigned long
__output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
const void *buf, unsigned long len)
__DEFINE_OUTPUT_COPY_BODY(copy_func)
static inline unsigned long
memcpy_common(void *dst, const void *src, unsigned long n)
{
......
......@@ -233,24 +233,17 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
.arg2_type = ARG_ANYTHING,
};
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_raw_record *raw)
{
struct pt_regs *regs = (struct pt_regs *) (long) r1;
struct bpf_map *map = (struct bpf_map *) (long) r2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
void *data = (void *) (long) r4;
struct perf_sample_data sample_data;
struct bpf_event_entry *ee;
struct perf_event *event;
struct perf_raw_record raw = {
.size = size,
.data = data,
};
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
if (index == BPF_F_CURRENT_CPU)
index = cpu;
if (unlikely(index >= array->map.max_entries))
......@@ -269,11 +262,29 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
return -EOPNOTSUPP;
perf_sample_data_init(&sample_data, 0, 0);
sample_data.raw = &raw;
sample_data.raw = raw;
perf_event_output(event, &sample_data, regs);
return 0;
}
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
{
struct pt_regs *regs = (struct pt_regs *)(long) r1;
struct bpf_map *map = (struct bpf_map *)(long) r2;
void *data = (void *)(long) r4;
struct perf_raw_record raw = {
.frag = {
.size = size,
.data = data,
},
};
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
return __bpf_perf_event_output(regs, map, flags, &raw);
}
static const struct bpf_func_proto bpf_perf_event_output_proto = {
.func = bpf_perf_event_output,
.gpl_only = true,
......@@ -287,29 +298,26 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
struct perf_raw_frag frag = {
.copy = ctx_copy,
.size = ctx_size,
.data = ctx,
};
struct perf_raw_record raw = {
.frag = {
.next = ctx_size ? &frag : NULL,
.size = meta_size,
.data = meta,
},
};
perf_fetch_caller_regs(regs);
return bpf_perf_event_output((long)regs, r2, flags, r4, size);
}
static const struct bpf_func_proto bpf_event_output_proto = {
.func = bpf_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_STACK,
.arg5_type = ARG_CONST_STACK_SIZE,
};
const struct bpf_func_proto *bpf_get_event_output_proto(void)
{
return &bpf_event_output_proto;
return __bpf_perf_event_output(regs, map, flags, &raw);
}
static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
......
......@@ -2025,6 +2025,47 @@ bool bpf_helper_changes_skb_data(void *func)
return false;
}
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
unsigned long len)
{
void *ptr = skb_header_pointer(skb, 0, len, dst_buff);
if (unlikely(!ptr))
return len;
if (ptr != dst_buff)
memcpy(dst_buff, ptr, len);
return 0;
}
static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
u64 meta_size)
{
struct sk_buff *skb = (struct sk_buff *)(long) r1;
struct bpf_map *map = (struct bpf_map *)(long) r2;
u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
void *meta = (void *)(long) r4;
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
if (unlikely(skb_size > skb->len))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
bpf_skb_copy);
}
static const struct bpf_func_proto bpf_skb_event_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_STACK,
.arg5_type = ARG_CONST_STACK_SIZE,
};
static unsigned short bpf_tunnel_key_af(u64 flags)
{
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
......@@ -2357,7 +2398,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_get_hash_recalc:
return &bpf_get_hash_recalc_proto;
case BPF_FUNC_perf_event_output:
return bpf_get_event_output_proto();
return &bpf_skb_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
#ifdef CONFIG_SOCK_CGROUP_DATA
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册