ipv6: sr: Add seg6local action End.BPF

This patch adds the End.BPF action to the LWT seg6local infrastructure. This action works like any other seg6local End action, meaning that an IPv6 header with SRH is needed, whose DA has to be equal to the SID of the action. It will also advance the SRH to the next segment, the BPF program does not have to take care of this. Since the BPF program may not be a source of instability in the kernel, it is important to ensure that the integrity of the packet is maintained before yielding it back to the IPv6 layer. The hook hence keeps track if the SRH has been altered through the helpers, and re-validates its content if needed with seg6_validate_srh. The state kept for validation is stored in a per-CPU buffer. The BPF program is not allowed to directly write into the packet, and only some fields of the SRH can be altered through the helper bpf_lwt_seg6_store_bytes. Performances profiling has shown that the SRH re-validation does not induce a significant overhead. If the altered SRH is deemed as invalid, the packet is dropped. This validation is also done before executing any action through bpf_lwt_seg6_action, and will not be performed again if the SRH is not modified after calling the action. The BPF program may return 3 types of return codes: - BPF_OK: the End.BPF action will look up the next destination through seg6_lookup_nexthop. - BPF_REDIRECT: if an action has been executed through the bpf_lwt_seg6_action helper, the BPF program should return this value, as the skb's destination is already set and the default lookup should not be performed. - BPF_DROP : the packet will be dropped. Signed-off-by: N Mathieu Xhonneux <m.xhonneux@gmail.com> Acked-by: N David Lebrun <dlebrun@google.com> Signed-off-by: N Daniel Borkmann <daniel@iogearbox.net>

ipv6: sr: Add seg6local action End.BPF
This patch adds the End.BPF action to the LWT seg6local infrastructure. This action works like any other seg6local End action, meaning that an IPv6 header with SRH is needed, whose DA has to be equal to the SID of the action. It will also advance the SRH to the next segment, the BPF program does not have to take care of this. Since the BPF program may not be a source of instability in the kernel, it is important to ensure that the integrity of the packet is maintained before yielding it back to the IPv6 layer. The hook hence keeps track if the SRH has been altered through the helpers, and re-validates its content if needed with seg6_validate_srh. The state kept for validation is stored in a per-CPU buffer. The BPF program is not allowed to directly write into the packet, and only some fields of the SRH can be altered through the helper bpf_lwt_seg6_store_bytes. Performances profiling has shown that the SRH re-validation does not induce a significant overhead. If the altered SRH is deemed as invalid, the packet is dropped. This validation is also done before executing any action through bpf_lwt_seg6_action, and will not be performed again if the SRH is not modified after calling the action. The BPF program may return 3 types of return codes: - BPF_OK: the End.BPF action will look up the next destination through seg6_lookup_nexthop. - BPF_REDIRECT: if an action has been executed through the bpf_lwt_seg6_action helper, the BPF program should return this value, as the skb's destination is already set and the default lookup should not be performed. - BPF_DROP : the packet will be dropped. Signed-off-by: N Mathieu Xhonneux <m.xhonneux@gmail.com> Acked-by: N David Lebrun <dlebrun@google.com> Signed-off-by: N Daniel Borkmann <daniel@iogearbox.net>
004d4b27 · Mathieu Xhonneux · Daniel Borkmann · cd3092c7 · 004d4b27 · 004d4b27
7 changed file
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -12,6 +12,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)

--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -141,6 +141,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_RAW_TRACEPOINT,
 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 };

 enum bpf_attach_type {

--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -25,6 +25,7 @@ enum {
 	SEG6_LOCAL_NH6,
 	SEG6_LOCAL_IIF,
 	SEG6_LOCAL_OIF,
+	SEG6_LOCAL_BPF,
 	__SEG6_LOCAL_MAX,
 };
 #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
@@ -59,10 +60,21 @@ enum {
 	SEG6_LOCAL_ACTION_END_AS	= 13,
 	/* forward to SR-unaware VNF with masquerading */
 	SEG6_LOCAL_ACTION_END_AM	= 14,
+	/* custom BPF action */
+	SEG6_LOCAL_ACTION_END_BPF	= 15,

 	__SEG6_LOCAL_ACTION_MAX,
 };

 #define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)

+enum {
+	SEG6_LOCAL_BPF_PROG_UNSPEC,
+	SEG6_LOCAL_BPF_PROG,
+	SEG6_LOCAL_BPF_PROG_NAME,
+	__SEG6_LOCAL_BPF_PROG_MAX,
+};
+
+#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
+
 #endif
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1262,6 +1262,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	switch (env->prog->type) {
 	case BPF_PROG_TYPE_LWT_IN:
 	case BPF_PROG_TYPE_LWT_OUT:
+	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
 		/* dst_input() and dst_output() can't write for now */
 		if (t == BPF_WRITE)
 			return false;

--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4921,6 +4921,21 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }

+static const struct bpf_func_proto *
+lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_lwt_seg6_store_bytes:
+		return &bpf_lwt_seg6_store_bytes_proto;
+	case BPF_FUNC_lwt_seg6_action:
+		return &bpf_lwt_seg6_action_proto;
+	case BPF_FUNC_lwt_seg6_adjust_srh:
+		return &bpf_lwt_seg6_adjust_srh_proto;
+	default:
+		return lwt_out_func_proto(func_id, prog);
+	}
+}
+
 static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
 				    const struct bpf_prog *prog,
 				    struct bpf_insn_access_aux *info)
@@ -6629,6 +6644,16 @@ const struct bpf_prog_ops lwt_xmit_prog_ops = {
 	.test_run		= bpf_prog_test_run_skb,
 };

+const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
+	.get_func_proto		= lwt_seg6local_func_proto,
+	.is_valid_access	= lwt_is_valid_access,
+	.convert_ctx_access	= bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_seg6local_prog_ops = {
+	.test_run		= bpf_prog_test_run_skb,
+};
+
 const struct bpf_verifier_ops cg_sock_verifier_ops = {
 	.get_func_proto		= sock_filter_func_proto,
 	.is_valid_access	= sock_filter_is_valid_access,

--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
 /*
 *  SR-IPv6 implementation
 *
- *  Author:
+ *  Authors:
 *  David Lebrun <david.lebrun@uclouvain.be>
+ *  eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
 *
 *
 *  This program is free software; you can redistribute it and/or
@@ -32,6 +33,7 @@
 #endif
 #include <net/seg6_local.h>
 #include <linux/etherdevice.h>
+#include <linux/bpf.h>

 struct seg6_local_lwt;

@@ -42,6 +44,11 @@ struct seg6_action_desc {
 	int static_headroom;
 };

+struct bpf_lwt_prog {
+	struct bpf_prog *prog;
+	char *name;
+};
+
 struct seg6_local_lwt {
 	int action;
 	struct ipv6_sr_hdr *srh;
@@ -50,6 +57,7 @@ struct seg6_local_lwt {
 	struct in6_addr nh6;
 	int iif;
 	int oif;
+	struct bpf_lwt_prog bpf;

 	int headroom;
 	struct seg6_action_desc *desc;
@@ -451,6 +459,69 @@ static int input_action_end_b6_encap(struct sk_buff *skb,

 DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);

+static int input_action_end_bpf(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct seg6_bpf_srh_state *srh_state =
+		this_cpu_ptr(&seg6_bpf_srh_states);
+	struct seg6_bpf_srh_state local_srh_state;
+	struct ipv6_sr_hdr *srh;
+	int srhoff = 0;
+	int ret;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	/* preempt_disable is needed to protect the per-CPU buffer srh_state,
+	 * which is also accessed by the bpf_lwt_seg6_* helpers
+	 */
+	preempt_disable();
+	srh_state->hdrlen = srh->hdrlen << 3;
+	srh_state->valid = 1;
+
+	rcu_read_lock();
+	bpf_compute_data_pointers(skb);
+	ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
+	rcu_read_unlock();
+
+	local_srh_state = *srh_state;
+	preempt_enable();
+
+	switch (ret) {
+	case BPF_OK:
+	case BPF_REDIRECT:
+		break;
+	case BPF_DROP:
+		goto drop;
+	default:
+		pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
+		goto drop;
+	}
+
+	if (unlikely((local_srh_state.hdrlen & 7) != 0))
+		goto drop;
+
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		goto drop;
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+	srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
+
+	if (!local_srh_state.valid &&
+	    unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
+		goto drop;
+
+	if (ret != BPF_REDIRECT)
+		seg6_lookup_nexthop(skb, NULL, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
 static struct seg6_action_desc seg6_action_table[] = {
 	{
 		.action		= SEG6_LOCAL_ACTION_END,
@@ -497,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = {
 		.attrs		= (1 << SEG6_LOCAL_SRH),
 		.input		= input_action_end_b6_encap,
 		.static_headroom	= sizeof(struct ipv6hdr),
-	}
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_BPF,
+		.attrs		= (1 << SEG6_LOCAL_BPF),
+		.input		= input_action_end_bpf,
+	},
+
 };

 static struct seg6_action_desc *__get_action_desc(int action)
@@ -542,6 +619,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
 				    .len = sizeof(struct in6_addr) },
 	[SEG6_LOCAL_IIF]	= { .type = NLA_U32 },
 	[SEG6_LOCAL_OIF]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_BPF]	= { .type = NLA_NESTED },
 };

 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@@ -719,6 +797,75 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
 	return 0;
 }

+#define MAX_PROG_NAME 256
+static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
+	[SEG6_LOCAL_BPF_PROG]	   = { .type = NLA_U32, },
+	[SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
+				       .len = MAX_PROG_NAME },
+};
+
+static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
+	struct bpf_prog *p;
+	int ret;
+	u32 fd;
+
+	ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX,
+			       attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
+		return -EINVAL;
+
+	slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
+	if (!slwt->bpf.name)
+		return -ENOMEM;
+
+	fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
+	p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
+	if (IS_ERR(p)) {
+		kfree(slwt->bpf.name);
+		return PTR_ERR(p);
+	}
+
+	slwt->bpf.prog = p;
+	return 0;
+}
+
+static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct nlattr *nest;
+
+	if (!slwt->bpf.prog)
+		return 0;
+
+	nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
+		return -EMSGSIZE;
+
+	if (slwt->bpf.name &&
+	    nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
+		return -EMSGSIZE;
+
+	return nla_nest_end(skb, nest);
+}
+
+static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	if (!a->bpf.name && !b->bpf.name)
+		return 0;
+
+	if (!a->bpf.name || !b->bpf.name)
+		return 1;
+
+	return strcmp(a->bpf.name, b->bpf.name);
+}
+
 struct seg6_action_param {
 	int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
 	int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
@@ -749,6 +896,11 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
 	[SEG6_LOCAL_OIF]	= { .parse = parse_nla_oif,
 				    .put = put_nla_oif,
 				    .cmp = cmp_nla_oif },
+
+	[SEG6_LOCAL_BPF]	= { .parse = parse_nla_bpf,
+				    .put = put_nla_bpf,
+				    .cmp = cmp_nla_bpf },
+
 };

 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@@ -834,6 +986,13 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
 	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);

 	kfree(slwt->srh);
+
+	if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
+		kfree(slwt->bpf.name);
+		bpf_prog_put(slwt->bpf.prog);
+	}
+
+	return;
 }

 static int seg6_local_fill_encap(struct sk_buff *skb,
@@ -886,6 +1045,11 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
 	if (attrs & (1 << SEG6_LOCAL_OIF))
 		nlsize += nla_total_size(4);

+	if (attrs & (1 << SEG6_LOCAL_BPF))
+		nlsize += nla_total_size(sizeof(struct nlattr)) +
+		       nla_total_size(MAX_PROG_NAME) +
+		       nla_total_size(4);
+
 	return nlsize;
 }


--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1456,6 +1456,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_LWT_IN:
 	case BPF_PROG_TYPE_LWT_OUT:
 	case BPF_PROG_TYPE_LWT_XMIT:
+	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
 	case BPF_PROG_TYPE_SOCK_OPS:
 	case BPF_PROG_TYPE_SK_SKB:
 	case BPF_PROG_TYPE_CGROUP_DEVICE: