提交 d375e344 编写于 作者: A Alexei Starovoitov

Merge branch 'cg_skb_direct_pkt_access'

Song Liu says:

====================
Changes v7 -> v8:
1. Dynamically allocate the dummy sk to avoid race conditions.

Changes v6 -> v7:
1. Make dummy sk a global variable (test_run_sk).

Changes v5 -> v6:
1. Fixed dummy sk in bpf_prog_test_run_skb() as suggested by Eric Dumazet.

Changes v4 -> v5:
1. Replaced bpf_compute_and_save_data_pointers() with
   bpf_compute_and_save_data_end();
   Replaced bpf_restore_data_pointers() with bpf_restore_data_end().
2. Fixed indentation in test_verifier.c

Changes v3 -> v4:
1. Fixed crash issue reported by Alexei.

Changes v2 -> v3:
1. Added helper function bpf_compute_and_save_data_pointers() and
   bpf_restore_data_pointers().

Changes v1 -> v2:
1. Updated the list of read-only fields, and read-write fields.
2. Added dummy sk to bpf_prog_test_run_skb().

This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
some __skb_buff data directly.
====================
Signed-off-by: NAlexei Starovoitov <ast@kernel.org>
......@@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
cb->data_end = skb->data + skb_headlen(skb);
}
/* Similar to bpf_compute_data_pointers(), except that save orginal
* data in cb->data and cb->meta_data for restore.
*/
static inline void bpf_compute_and_save_data_end(
struct sk_buff *skb, void **saved_data_end)
{
struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
*saved_data_end = cb->data_end;
cb->data_end = skb->data + skb_headlen(skb);
}
/* Restore data saved by bpf_compute_data_pointers(). */
static inline void bpf_restore_data_end(
struct sk_buff *skb, void *saved_data_end)
{
struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
cb->data_end = saved_data_end;
}
static inline u8 *bpf_skb_cb(struct sk_buff *skb)
{
/* eBPF programs may read/write skb->cb[] area to transfer meta
......
......@@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
{
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
void *saved_data_end;
struct cgroup *cgrp;
int ret;
......@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
/* compute pointers for the bpf prog */
bpf_compute_and_save_data_end(skb, &saved_data_end);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
bpf_prog_run_save_cb);
bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
skb->sk = save_sk;
return ret == 1 ? 0 : -EPERM;
......
......@@ -10,6 +10,8 @@
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/sched/signal.h>
#include <net/sock.h>
#include <net/tcp.h>
static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
......@@ -115,6 +117,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
struct sock *sk;
void *data;
int ret;
......@@ -137,11 +140,21 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
break;
}
sk = kzalloc(sizeof(struct sock), GFP_USER);
if (!sk) {
kfree(data);
return -ENOMEM;
}
sock_net_set(sk, current->nsproxy->net_ns);
sock_init_data(NULL, sk);
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
kfree(sk);
return -ENOMEM;
}
skb->sk = sk;
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
......@@ -159,6 +172,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
kfree_skb(skb);
kfree(sk);
return -ENOMEM;
}
}
......@@ -171,6 +185,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
size = skb_headlen(skb);
ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
kfree_skb(skb);
kfree(sk);
return ret;
}
......
......@@ -5352,6 +5352,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool cg_skb_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
}
if (type == BPF_WRITE) {
switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, priority):
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
default:
return false;
}
}
switch (off) {
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
break;
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
}
return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
......@@ -7044,7 +7078,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.is_valid_access = cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
......
......@@ -4862,6 +4862,177 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"direct packet read test#1 for CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct __sk_buff, data)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
offsetof(struct __sk_buff, data_end)),
BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
offsetof(struct __sk_buff, len)),
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
offsetof(struct __sk_buff, pkt_type)),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
offsetof(struct __sk_buff, mark)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
offsetof(struct __sk_buff, mark)),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct __sk_buff, queue_mapping)),
BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
offsetof(struct __sk_buff, protocol)),
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
offsetof(struct __sk_buff, vlan_present)),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"direct packet read test#2 for CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
offsetof(struct __sk_buff, vlan_tci)),
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
offsetof(struct __sk_buff, vlan_proto)),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
offsetof(struct __sk_buff, priority)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
offsetof(struct __sk_buff, priority)),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct __sk_buff,
ingress_ifindex)),
BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
offsetof(struct __sk_buff, tc_index)),
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
offsetof(struct __sk_buff, hash)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"direct packet read test#3 for CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
offsetof(struct __sk_buff, cb[0])),
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
offsetof(struct __sk_buff, cb[1])),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
offsetof(struct __sk_buff, cb[2])),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct __sk_buff, cb[3])),
BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
offsetof(struct __sk_buff, cb[4])),
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
offsetof(struct __sk_buff, napi_id)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
offsetof(struct __sk_buff, cb[0])),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
offsetof(struct __sk_buff, cb[1])),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
offsetof(struct __sk_buff, cb[2])),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
offsetof(struct __sk_buff, cb[3])),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
offsetof(struct __sk_buff, cb[4])),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"direct packet read test#4 for CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct __sk_buff, family)),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
offsetof(struct __sk_buff, remote_ip4)),
BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
offsetof(struct __sk_buff, local_ip4)),
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
offsetof(struct __sk_buff, remote_ip6[0])),
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
offsetof(struct __sk_buff, remote_ip6[1])),
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
offsetof(struct __sk_buff, remote_ip6[2])),
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
offsetof(struct __sk_buff, remote_ip6[3])),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
offsetof(struct __sk_buff, local_ip6[0])),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
offsetof(struct __sk_buff, local_ip6[1])),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
offsetof(struct __sk_buff, local_ip6[2])),
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
offsetof(struct __sk_buff, local_ip6[3])),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct __sk_buff, remote_port)),
BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
offsetof(struct __sk_buff, local_port)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid access of tc_classid for CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, tc_classid)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = REJECT,
.errstr = "invalid bpf_context access",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid access of data_meta for CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, data_meta)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = REJECT,
.errstr = "invalid bpf_context access",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid access of flow_keys for CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, flow_keys)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = REJECT,
.errstr = "invalid bpf_context access",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"invalid write access to napi_id for CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
offsetof(struct __sk_buff, napi_id)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
offsetof(struct __sk_buff, napi_id)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = REJECT,
.errstr = "invalid bpf_context access",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"valid cgroup storage access",
.insns = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册