提交 0e25d14e 编写于 作者: D Daniel Borkmann

Merge branch 'bpf-xfrm-states'

Eyal Birger says:

====================
This patchset adds support for fetching XFRM state information from
an eBPF program called from TC.

The first patch introduces a helper for fetching an XFRM state from the
skb's secpath. The XFRM state is modeled using a new virtual struct which
contains the SPI, peer address, and reqid values of the state; This struct
can be extended in the future to provide additional state information.

The second patch adds a test example in test_tunnel_bpf.sh. The sample
validates the correct extraction of state information by the eBPF program.

v3:
  - Kept SPI and peer IPv4 address in state in network byte order
    following suggestion from Alexei Starovoitov
v2:
  - Fixed two comments by Daniel Borkmann:
    - disallow reserved flags in helper call
    - avoid compiling in helper code when CONFIG_XFRM is off
====================
Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
...@@ -774,6 +774,15 @@ union bpf_attr { ...@@ -774,6 +774,15 @@ union bpf_attr {
* @xdp_md: pointer to xdp_md * @xdp_md: pointer to xdp_md
* @delta: A negative integer to be added to xdp_md.data_end * @delta: A negative integer to be added to xdp_md.data_end
* Return: 0 on success or negative on error * Return: 0 on success or negative on error
*
* int bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags)
* retrieve XFRM state
* @skb: pointer to skb
* @index: index of the xfrm state in the secpath
* @key: pointer to 'struct bpf_xfrm_state'
* @size: size of 'struct bpf_xfrm_state'
* @flags: room for future extensions
* Return: 0 on success or negative error
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -841,7 +850,8 @@ union bpf_attr { ...@@ -841,7 +850,8 @@ union bpf_attr {
FN(msg_cork_bytes), \ FN(msg_cork_bytes), \
FN(msg_pull_data), \ FN(msg_pull_data), \
FN(bind), \ FN(bind), \
FN(xdp_adjust_tail), FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
...@@ -947,6 +957,19 @@ struct bpf_tunnel_key { ...@@ -947,6 +957,19 @@ struct bpf_tunnel_key {
__u32 tunnel_label; __u32 tunnel_label;
}; };
/* user accessible mirror of in-kernel xfrm_state.
* new fields can only be added to the end of this structure
*/
struct bpf_xfrm_state {
__u32 reqid;
__u32 spi; /* Stored in network byte order */
__u16 family;
union {
__u32 remote_ipv4; /* Stored in network byte order */
__u32 remote_ipv6[4]; /* Stored in network byte order */
};
};
/* Generic BPF return codes which all BPF program types may support. /* Generic BPF return codes which all BPF program types may support.
* The values are binary compatible with their TC_ACT_* counter-part to * The values are binary compatible with their TC_ACT_* counter-part to
* provide backwards compatibility with existing SCHED_CLS and SCHED_ACT * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
#include <net/sock_reuseport.h> #include <net/sock_reuseport.h>
#include <net/busy_poll.h> #include <net/busy_poll.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/xfrm.h>
#include <linux/bpf_trace.h> #include <linux/bpf_trace.h>
/** /**
...@@ -3743,6 +3744,49 @@ static const struct bpf_func_proto bpf_bind_proto = { ...@@ -3743,6 +3744,49 @@ static const struct bpf_func_proto bpf_bind_proto = {
.arg3_type = ARG_CONST_SIZE, .arg3_type = ARG_CONST_SIZE,
}; };
#ifdef CONFIG_XFRM
BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
struct bpf_xfrm_state *, to, u32, size, u64, flags)
{
const struct sec_path *sp = skb_sec_path(skb);
const struct xfrm_state *x;
if (!sp || unlikely(index >= sp->len || flags))
goto err_clear;
x = sp->xvec[index];
if (unlikely(size != sizeof(struct bpf_xfrm_state)))
goto err_clear;
to->reqid = x->props.reqid;
to->spi = x->id.spi;
to->family = x->props.family;
if (to->family == AF_INET6) {
memcpy(to->remote_ipv6, x->props.saddr.a6,
sizeof(to->remote_ipv6));
} else {
to->remote_ipv4 = x->props.saddr.a4;
}
return 0;
err_clear:
memset(to, 0, size);
return -EINVAL;
}
static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
.func = bpf_skb_get_xfrm_state,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
.arg4_type = ARG_CONST_SIZE,
.arg5_type = ARG_ANYTHING,
};
#endif
static const struct bpf_func_proto * static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id) bpf_base_func_proto(enum bpf_func_id func_id)
{ {
...@@ -3884,6 +3928,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -3884,6 +3928,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_proto; return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid: case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto; return &bpf_get_socket_uid_proto;
#ifdef CONFIG_XFRM
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
#endif
default: default:
return bpf_base_func_proto(func_id); return bpf_base_func_proto(func_id);
} }
......
...@@ -593,4 +593,20 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb) ...@@ -593,4 +593,20 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
} }
SEC("xfrm_get_state")
int _xfrm_get_state(struct __sk_buff *skb)
{
struct bpf_xfrm_state x;
char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
int ret;
ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
if (ret < 0)
return TC_ACT_OK;
bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
bpf_ntohl(x.remote_ipv4));
return TC_ACT_OK;
}
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
...@@ -155,6 +155,57 @@ function add_ipip_tunnel { ...@@ -155,6 +155,57 @@ function add_ipip_tunnel {
ip addr add dev $DEV 10.1.1.200/24 ip addr add dev $DEV 10.1.1.200/24
} }
function setup_xfrm_tunnel {
auth=0x$(printf '1%.0s' {1..40})
enc=0x$(printf '2%.0s' {1..32})
spi_in_to_out=0x1
spi_out_to_in=0x2
# in namespace
# in -> out
ip netns exec at_ns0 \
ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
spi $spi_in_to_out reqid 1 mode tunnel \
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
ip netns exec at_ns0 \
ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
mode tunnel
# out -> in
ip netns exec at_ns0 \
ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
spi $spi_out_to_in reqid 2 mode tunnel \
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
ip netns exec at_ns0 \
ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
mode tunnel
# address & route
ip netns exec at_ns0 \
ip addr add dev veth0 10.1.1.100/32
ip netns exec at_ns0 \
ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
src 10.1.1.100
# out of namespace
# in -> out
ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
spi $spi_in_to_out reqid 1 mode tunnel \
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
mode tunnel
# out -> in
ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
spi $spi_out_to_in reqid 2 mode tunnel \
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
mode tunnel
# address & route
ip addr add dev veth1 10.1.1.200/32
ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
}
function attach_bpf { function attach_bpf {
DEV=$1 DEV=$1
SET_TUNNEL=$2 SET_TUNNEL=$2
...@@ -278,6 +329,22 @@ function test_ipip { ...@@ -278,6 +329,22 @@ function test_ipip {
cleanup cleanup
} }
function test_xfrm_tunnel {
config_device
tcpdump -nei veth1 ip &
output=$(mktemp)
cat /sys/kernel/debug/tracing/trace_pipe | tee $output &
setup_xfrm_tunnel
tc qdisc add dev veth1 clsact
tc filter add dev veth1 proto ip ingress bpf da obj tcbpf2_kern.o \
sec xfrm_get_state
ip netns exec at_ns0 ping -c 1 10.1.1.200
grep "reqid 1" $output
grep "spi 0x1" $output
grep "remote ip 0xac100164" $output
cleanup
}
function cleanup { function cleanup {
set +ex set +ex
pkill iperf pkill iperf
...@@ -291,6 +358,8 @@ function cleanup { ...@@ -291,6 +358,8 @@ function cleanup {
ip link del geneve11 ip link del geneve11
ip link del erspan11 ip link del erspan11
ip link del ip6erspan11 ip link del ip6erspan11
ip x s flush
ip x p flush
pkill tcpdump pkill tcpdump
pkill cat pkill cat
set -ex set -ex
...@@ -316,4 +385,6 @@ echo "Testing GENEVE tunnel..." ...@@ -316,4 +385,6 @@ echo "Testing GENEVE tunnel..."
test_geneve test_geneve
echo "Testing IPIP tunnel..." echo "Testing IPIP tunnel..."
test_ipip test_ipip
echo "Testing IPSec tunnel..."
test_xfrm_tunnel
echo "*** PASS ***" echo "*** PASS ***"
...@@ -774,6 +774,15 @@ union bpf_attr { ...@@ -774,6 +774,15 @@ union bpf_attr {
* @xdp_md: pointer to xdp_md * @xdp_md: pointer to xdp_md
* @delta: A negative integer to be added to xdp_md.data_end * @delta: A negative integer to be added to xdp_md.data_end
* Return: 0 on success or negative on error * Return: 0 on success or negative on error
*
* int bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags)
* retrieve XFRM state
* @skb: pointer to skb
* @index: index of the xfrm state in the secpath
* @key: pointer to 'struct bpf_xfrm_state'
* @size: size of 'struct bpf_xfrm_state'
* @flags: room for future extensions
* Return: 0 on success or negative error
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -841,7 +850,8 @@ union bpf_attr { ...@@ -841,7 +850,8 @@ union bpf_attr {
FN(msg_cork_bytes), \ FN(msg_cork_bytes), \
FN(msg_pull_data), \ FN(msg_pull_data), \
FN(bind), \ FN(bind), \
FN(xdp_adjust_tail), FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
...@@ -946,6 +956,19 @@ struct bpf_tunnel_key { ...@@ -946,6 +956,19 @@ struct bpf_tunnel_key {
__u32 tunnel_label; __u32 tunnel_label;
}; };
/* user accessible mirror of in-kernel xfrm_state.
* new fields can only be added to the end of this structure
*/
struct bpf_xfrm_state {
__u32 reqid;
__u32 spi; /* Stored in network byte order */
__u16 family;
union {
__u32 remote_ipv4; /* Stored in network byte order */
__u32 remote_ipv6[4]; /* Stored in network byte order */
};
};
/* Generic BPF return codes which all BPF program types may support. /* Generic BPF return codes which all BPF program types may support.
* The values are binary compatible with their TC_ACT_* counter-part to * The values are binary compatible with their TC_ACT_* counter-part to
* provide backwards compatibility with existing SCHED_CLS and SCHED_ACT * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
......
...@@ -98,7 +98,9 @@ static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = ...@@ -98,7 +98,9 @@ static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
(void *) BPF_FUNC_bind; (void *) BPF_FUNC_bind;
static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_tail; (void *) BPF_FUNC_xdp_adjust_tail;
static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
int size, int flags) =
(void *) BPF_FUNC_skb_get_xfrm_state;
/* llvm builtin functions that eBPF C program may use to /* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions * emit BPF_LD_ABS and BPF_LD_IND instructions
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册