提交 f198222f 编写于 作者: D Daniel Borkmann

Merge branch 'bpf-sendmsg-hook'

Andrey Ignatov says:

====================
v3 -> v4:
* handle static key correctly for CONFIG_CGROUP_BPF=n.

v2 -> v3:
* place BPF logic under static key in udp_sendmsg, udpv6_sendmsg;
* rebase.

v1 -> v2:
* return ENOTSUPP if bpf_prog rewrote IPv6-only with IPv4-mapped IPv6;
* add test for IPv4-mapped IPv6 use-case;
* fix build for CONFIG_CGROUP_BPF=n;
* rebase.

This path set adds BPF hooks for sys_sendmsg similar to existing hooks for
sys_bind and sys_connect.

Hooks allow to override source IP (including the case when it's set via
cmsg(3)) and destination IP:port for unconnected UDP (slow path). TCP and
connected UDP (fast path) are not affected. This makes UDP support
complete: connected UDP is handled by sys_connect hooks, unconnected by
sys_sendmsg ones.

Similar to sys_connect hooks, sys_sendmsg ones can be used to make system
calls such as sendmsg(2) and sendto(2) return EPERM.

Please see patch 0002 for more details.
====================
Signed-off-by: NDaniel Borkmann <daniel@iogearbox.net>
......@@ -66,7 +66,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type);
enum bpf_attach_type type,
void *t_ctx);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
......@@ -120,16 +121,18 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
NULL); \
__ret; \
})
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) { \
lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
t_ctx); \
release_sock(sk); \
} \
__ret; \
......@@ -151,10 +154,16 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
......@@ -185,6 +194,7 @@ struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define cgroup_bpf_enabled (0)
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
......@@ -197,6 +207,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
......
......@@ -1010,6 +1010,7 @@ struct bpf_sock_addr_kern {
* only two (src and dst) are available at convert_ctx_access time
*/
u64 tmp_reg;
void *t_ctx; /* Attach type specific context. */
};
struct bpf_sock_ops_kern {
......
......@@ -160,6 +160,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_UDP4_SENDMSG,
BPF_CGROUP_UDP6_SENDMSG,
__MAX_BPF_ATTACH_TYPE
};
......@@ -2363,6 +2365,12 @@ struct bpf_sock_addr {
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
__u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
__u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
};
/* User bpf_sock_ops struct to access socket values and specify request ops
......
......@@ -500,6 +500,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
* @sk: sock struct that will use sockaddr
* @uaddr: sockaddr struct provided by user
* @type: The type of program to be exectuted
* @t_ctx: Pointer to attach type specific context
*
* socket is expected to be of type INET or INET6.
*
......@@ -508,12 +509,15 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
*/
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type)
enum bpf_attach_type type,
void *t_ctx)
{
struct bpf_sock_addr_kern ctx = {
.sk = sk,
.uaddr = uaddr,
.t_ctx = t_ctx,
};
struct sockaddr_storage unspec;
struct cgroup *cgrp;
int ret;
......@@ -523,6 +527,11 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
return 0;
if (!ctx.uaddr) {
memset(&unspec, 0, sizeof(unspec));
ctx.uaddr = (struct sockaddr *)&unspec;
}
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
......
......@@ -1249,6 +1249,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
return 0;
default:
return -EINVAL;
......@@ -1565,6 +1567,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
......@@ -1635,6 +1639,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
......@@ -1692,6 +1698,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET6_POST_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
break;
......
......@@ -5299,6 +5299,7 @@ static bool sock_addr_is_valid_access(int off, int size,
switch (prog->expected_attach_type) {
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
break;
default:
return false;
......@@ -5308,6 +5309,24 @@ static bool sock_addr_is_valid_access(int off, int size,
switch (prog->expected_attach_type) {
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP6_SENDMSG:
break;
default:
return false;
}
break;
case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
switch (prog->expected_attach_type) {
case BPF_CGROUP_UDP4_SENDMSG:
break;
default:
return false;
}
break;
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
msg_src_ip6[3]):
switch (prog->expected_attach_type) {
case BPF_CGROUP_UDP6_SENDMSG:
break;
default:
return false;
......@@ -5318,6 +5337,9 @@ static bool sock_addr_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
msg_src_ip6[3]):
/* Only narrow read access allowed for now. */
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
......@@ -6072,6 +6094,23 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
SK_FL_PROTO_SHIFT);
break;
case offsetof(struct bpf_sock_addr, msg_src_ip4):
/* Treat t_ctx as struct in_addr for msg_src_ip4. */
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
struct bpf_sock_addr_kern, struct in_addr, t_ctx,
s_addr, BPF_SIZE(si->code), 0, tmp_reg);
break;
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
msg_src_ip6[3]):
off = si->off;
off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
/* Treat t_ctx as struct in6_addr for msg_src_ip6. */
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
break;
}
return insn - insn_buf;
......
......@@ -901,6 +901,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct udp_sock *up = udp_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
struct flowi4 fl4_stack;
struct flowi4 *fl4;
int ulen = len;
......@@ -955,8 +956,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/*
* Get and verify the address.
*/
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
if (usin) {
if (msg->msg_namelen < sizeof(*usin))
return -EINVAL;
if (usin->sin_family != AF_INET) {
......@@ -1010,6 +1010,22 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rcu_read_unlock();
}
if (cgroup_bpf_enabled && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
(struct sockaddr *)usin, &ipc.addr);
if (err)
goto out_free;
if (usin) {
if (usin->sin_port == 0) {
/* BPF program set invalid port. Reject it. */
err = -EINVAL;
goto out_free;
}
daddr = usin->sin_addr.s_addr;
dport = usin->sin_port;
}
}
saddr = ipc.addr;
ipc.addr = faddr = daddr;
......
......@@ -1316,6 +1316,29 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
if (cgroup_bpf_enabled && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6, &fl6.saddr);
if (err)
goto out_no_dst;
if (sin6) {
if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
/* BPF program rewrote IPv6-only by IPv4-mapped
* IPv6. It's currently unsupported.
*/
err = -ENOTSUPP;
goto out_no_dst;
}
if (sin6->sin6_port == 0) {
/* BPF program set invalid port. Reject it. */
err = -EINVAL;
goto out_no_dst;
}
fl6.fl6_dport = sin6->sin6_port;
fl6.daddr = sin6->sin6_addr;
}
}
final_p = fl6_update_dst(&fl6, opt, &final);
if (final_p)
connected = false;
......@@ -1395,6 +1418,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
out:
dst_release(dst);
out_no_dst:
fl6_sock_release(flowlabel);
txopt_put(opt_to_free);
if (!err)
......
......@@ -160,6 +160,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_UDP4_SENDMSG,
BPF_CGROUP_UDP6_SENDMSG,
__MAX_BPF_ATTACH_TYPE
};
......@@ -2363,6 +2365,12 @@ struct bpf_sock_addr {
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
__u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
__u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
};
/* User bpf_sock_ops struct to access socket values and specify request ops
......
......@@ -2043,6 +2043,8 @@ static const struct {
BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG),
BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG),
BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
};
......
......@@ -34,7 +34,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <sys/socket.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
#define SRC2_IP4 0x00000000U
#define SRC_REWRITE_IP4 0x7f000004U
#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */
#define DST_REWRITE_IP4 0x7f000001U
#define DST_PORT 4040
#define DST_REWRITE_PORT4 4444
int _version SEC("version") = 1;
SEC("cgroup/sendmsg4")
int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
{
if (ctx->type != SOCK_DGRAM)
return 0;
/* Rewrite source. */
if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
} else {
/* Unexpected source. Reject sendmsg. */
return 0;
}
/* Rewrite destination. */
if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
ctx->user_port == bpf_htons(DST_PORT)) {
ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
} else {
/* Unexpected source. Reject sendmsg. */
return 0;
}
return 1;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <sys/socket.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#define SRC_REWRITE_IP6_0 0
#define SRC_REWRITE_IP6_1 0
#define SRC_REWRITE_IP6_2 0
#define SRC_REWRITE_IP6_3 6
#define DST_REWRITE_IP6_0 0
#define DST_REWRITE_IP6_1 0
#define DST_REWRITE_IP6_2 0
#define DST_REWRITE_IP6_3 1
#define DST_REWRITE_PORT6 6666
int _version SEC("version") = 1;
SEC("cgroup/sendmsg6")
int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
{
if (ctx->type != SOCK_DGRAM)
return 0;
/* Rewrite source. */
if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
ctx->msg_src_ip6[3] == bpf_htonl(0)) {
ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
} else {
/* Unexpected source. Reject sendmsg. */
return 0;
}
/* Rewrite destination. */
if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) &&
ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) {
ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
} else {
/* Unexpected destination. Reject sendmsg. */
return 0;
}
return 1;
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册