提交 77241217 编写于 作者: S Stanislav Fomichev 提交者: Alexei Starovoitov

bpf: Allow rewriting to ports under ip_unprivileged_port_start

At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
to the privileged ones (< ip_unprivileged_port_start), but it will
be rejected later on in the __inet_bind or __inet6_bind.

Let's add another return value to indicate that CAP_NET_BIND_SERVICE
check should be ignored. Use the same idea as we currently use
in cgroup/egress where bit #1 indicates CN. Instead, for
cgroup/bind{4,6}, bit #1 indicates that CAP_NET_BIND_SERVICE should
be bypassed.

v5:
- rename flags to be less confusing (Andrey Ignatov)
- rework BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY to work on flags
  and accept BPF_RET_SET_CN (no behavioral changes)

v4:
- Add missing IPv6 support (Martin KaFai Lau)

v3:
- Update description (Martin KaFai Lau)
- Fix capability restore in selftest (Martin KaFai Lau)

v2:
- Switch to explicit return code (Martin KaFai Lau)
Signed-off-by: NStanislav Fomichev <sdf@google.com>
Signed-off-by: NAlexei Starovoitov <ast@kernel.org>
Reviewed-by: NMartin KaFai Lau <kafai@fb.com>
Acked-by: NAndrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/20210127193140.3170382-1-sdf@google.com
上级 8063e184
...@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, ...@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr, struct sockaddr *uaddr,
enum bpf_attach_type type, enum bpf_attach_type type,
void *t_ctx); void *t_ctx,
u32 *flags);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops, struct bpf_sock_ops_kern *sock_ops,
...@@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, ...@@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \ #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
({ \ ({ \
u32 __unused_flags; \
int __ret = 0; \ int __ret = 0; \
if (cgroup_bpf_enabled(type)) \ if (cgroup_bpf_enabled(type)) \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
NULL); \ NULL, \
&__unused_flags); \
__ret; \ __ret; \
}) })
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \ #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
({ \ ({ \
u32 __unused_flags; \
int __ret = 0; \ int __ret = 0; \
if (cgroup_bpf_enabled(type)) { \ if (cgroup_bpf_enabled(type)) { \
lock_sock(sk); \ lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
t_ctx); \ t_ctx, \
&__unused_flags); \
release_sock(sk); \ release_sock(sk); \
} \ } \
__ret; \ __ret; \
}) })
#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) \ /* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL) * via upper bits of return code. The only flag that is supported
* (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \ * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL) */
#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \
({ \
u32 __flags = 0; \
int __ret = 0; \
if (cgroup_bpf_enabled(type)) { \
lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
NULL, &__flags); \
release_sock(sk); \
if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
*bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
} \
__ret; \
})
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \ #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \ ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
...@@ -453,8 +472,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, ...@@ -453,8 +472,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
......
...@@ -1073,6 +1073,34 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, ...@@ -1073,6 +1073,34 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *include_prog, struct bpf_prog *include_prog,
struct bpf_prog_array **new_array); struct bpf_prog_array **new_array);
/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0)
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
({ \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
u32 func_ret; \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
bpf_cgroup_storage_set(_item->cgroup_storage); \
func_ret = func(_prog, ctx); \
_ret &= (func_ret & 1); \
*(ret_flags) |= (func_ret >> 1); \
_item++; \
} \
rcu_read_unlock(); \
migrate_enable(); \
_ret; \
})
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \ ({ \
struct bpf_prog_array_item *_item; \ struct bpf_prog_array_item *_item; \
...@@ -1120,25 +1148,11 @@ _out: \ ...@@ -1120,25 +1148,11 @@ _out: \
*/ */
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \ ({ \
struct bpf_prog_array_item *_item; \ u32 _flags = 0; \
struct bpf_prog *_prog; \ bool _cn; \
struct bpf_prog_array *_array; \ u32 _ret; \
u32 ret; \ _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
u32 _ret = 1; \ _cn = _flags & BPF_RET_SET_CN; \
u32 _cn = 0; \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
bpf_cgroup_storage_set(_item->cgroup_storage); \
ret = func(_prog, ctx); \
_ret &= (ret & 1); \
_cn |= (ret & 2); \
_item++; \
} \
rcu_read_unlock(); \
migrate_enable(); \
if (_ret) \ if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \ else \
......
...@@ -41,6 +41,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); ...@@ -41,6 +41,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
#define BIND_WITH_LOCK (1 << 1) #define BIND_WITH_LOCK (1 << 1)
/* Called from BPF program. */ /* Called from BPF program. */
#define BIND_FROM_BPF (1 << 2) #define BIND_FROM_BPF (1 << 2)
/* Skip CAP_NET_BIND_SERVICE check. */
#define BIND_NO_CAP_NET_BIND_SERVICE (1 << 3)
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
u32 flags); u32 flags);
int inet_getname(struct socket *sock, struct sockaddr *uaddr, int inet_getname(struct socket *sock, struct sockaddr *uaddr,
......
...@@ -1055,6 +1055,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); ...@@ -1055,6 +1055,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
* @uaddr: sockaddr struct provided by user * @uaddr: sockaddr struct provided by user
* @type: The type of program to be exectuted * @type: The type of program to be exectuted
* @t_ctx: Pointer to attach type specific context * @t_ctx: Pointer to attach type specific context
* @flags: Pointer to u32 which contains higher bits of BPF program
* return value (OR'ed together).
* *
* socket is expected to be of type INET or INET6. * socket is expected to be of type INET or INET6.
* *
...@@ -1064,7 +1066,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); ...@@ -1064,7 +1066,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr, struct sockaddr *uaddr,
enum bpf_attach_type type, enum bpf_attach_type type,
void *t_ctx) void *t_ctx,
u32 *flags)
{ {
struct bpf_sock_addr_kern ctx = { struct bpf_sock_addr_kern ctx = {
.sk = sk, .sk = sk,
...@@ -1087,7 +1090,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, ...@@ -1087,7 +1090,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
} }
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
BPF_PROG_RUN, flags);
return ret == 1 ? 0 : -EPERM; return ret == 1 ? 0 : -EPERM;
} }
......
...@@ -7986,6 +7986,9 @@ static int check_return_code(struct bpf_verifier_env *env) ...@@ -7986,6 +7986,9 @@ static int check_return_code(struct bpf_verifier_env *env)
env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
range = tnum_range(1, 1); range = tnum_range(1, 1);
if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
range = tnum_range(0, 3);
break; break;
case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SKB:
if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
......
...@@ -438,6 +438,7 @@ EXPORT_SYMBOL(inet_release); ...@@ -438,6 +438,7 @@ EXPORT_SYMBOL(inet_release);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
int err; int err;
/* If the socket has its own bind function then use it. (RAW) */ /* If the socket has its own bind function then use it. (RAW) */
...@@ -450,11 +451,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ...@@ -450,11 +451,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog /* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught. * changes context in a wrong way it will be caught.
*/ */
err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr); err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
BPF_CGROUP_INET4_BIND, &flags);
if (err) if (err)
return err; return err;
return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK); return __inet_bind(sk, uaddr, addr_len, flags);
} }
EXPORT_SYMBOL(inet_bind); EXPORT_SYMBOL(inet_bind);
...@@ -499,7 +501,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, ...@@ -499,7 +501,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
snum = ntohs(addr->sin_port); snum = ntohs(addr->sin_port);
err = -EACCES; err = -EACCES;
if (snum && inet_port_requires_bind_service(net, snum) && if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
snum && inet_port_requires_bind_service(net, snum) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
goto out; goto out;
......
...@@ -295,7 +295,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, ...@@ -295,7 +295,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
return -EINVAL; return -EINVAL;
snum = ntohs(addr->sin6_port); snum = ntohs(addr->sin6_port);
if (snum && inet_port_requires_bind_service(net, snum) && if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
snum && inet_port_requires_bind_service(net, snum) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES; return -EACCES;
...@@ -439,6 +440,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, ...@@ -439,6 +440,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
int err = 0; int err = 0;
/* If the socket has its own bind function then use it. */ /* If the socket has its own bind function then use it. */
...@@ -451,11 +453,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ...@@ -451,11 +453,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog /* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught. * changes context in a wrong way it will be caught.
*/ */
err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr); err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
BPF_CGROUP_INET6_BIND, &flags);
if (err) if (err)
return err; return err;
return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK); return __inet6_bind(sk, uaddr, addr_len, flags);
} }
EXPORT_SYMBOL(inet6_bind); EXPORT_SYMBOL(inet6_bind);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册