提交 c4dcfdd4 编写于 作者: Y YiFei Zhu 提交者: Alexei Starovoitov

bpf: Move getsockopt retval to struct bpf_cg_run_ctx

The retval value is moved to struct bpf_cg_run_ctx for ease of access
in different prog types with different context structs layouts. The
helper implementation (to be added in a later patch in the series) can
simply perform a container_of from current->bpf_ctx to retrieve
bpf_cg_run_ctx.

Unfortunately, there is no easy way to access the current task_struct
via the verifier BPF bytecode rewrite, aside from possibly calling a
helper, so a pointer to current task is added to struct bpf_sockopt_kern
so that the rewritten BPF bytecode can access struct bpf_cg_run_ctx with
an indirection.

For backward compatibility, if a getsockopt program rejects a syscall
by returning 0, an -EPERM will be generated, by having the
BPF_PROG_RUN_ARRAY_CG family macros automatically set the retval to
-EPERM. Unlike prior to this patch, this -EPERM will be visible to
ctx->retval for any other hooks down the line in the prog array.

Additionally, the restriction that getsockopt filters can only set
the retval to 0 is removed, considering that certain getsockopt
implementations may return optlen. Filters are now able to set the
value arbitrarily.
Signed-off-by: NYiFei Zhu <zhuyifei@google.com>
Reviewed-by: NStanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/73b0325f5c29912ccea7ea57ec1ed4d388fc1d37.1639619851.git.zhuyifei@google.comSigned-off-by: NAlexei Starovoitov <ast@kernel.org>
上级 f10d0596
...@@ -1245,6 +1245,7 @@ struct bpf_run_ctx {}; ...@@ -1245,6 +1245,7 @@ struct bpf_run_ctx {};
struct bpf_cg_run_ctx { struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx; struct bpf_run_ctx run_ctx;
const struct bpf_prog_array_item *prog_item; const struct bpf_prog_array_item *prog_item;
int retval;
}; };
struct bpf_trace_run_ctx { struct bpf_trace_run_ctx {
...@@ -1280,16 +1281,16 @@ typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); ...@@ -1280,16 +1281,16 @@ typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
static __always_inline int static __always_inline int
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog, const void *ctx, bpf_prog_run_fn run_prog,
u32 *ret_flags) int retval, u32 *ret_flags)
{ {
const struct bpf_prog_array_item *item; const struct bpf_prog_array_item *item;
const struct bpf_prog *prog; const struct bpf_prog *prog;
const struct bpf_prog_array *array; const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx; struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx; struct bpf_cg_run_ctx run_ctx;
int ret = 0;
u32 func_ret; u32 func_ret;
run_ctx.retval = retval;
migrate_disable(); migrate_disable();
rcu_read_lock(); rcu_read_lock();
array = rcu_dereference(array_rcu); array = rcu_dereference(array_rcu);
...@@ -1299,27 +1300,28 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, ...@@ -1299,27 +1300,28 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
run_ctx.prog_item = item; run_ctx.prog_item = item;
func_ret = run_prog(prog, ctx); func_ret = run_prog(prog, ctx);
if (!(func_ret & 1)) if (!(func_ret & 1))
ret = -EPERM; run_ctx.retval = -EPERM;
*(ret_flags) |= (func_ret >> 1); *(ret_flags) |= (func_ret >> 1);
item++; item++;
} }
bpf_reset_run_ctx(old_run_ctx); bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock(); rcu_read_unlock();
migrate_enable(); migrate_enable();
return ret; return run_ctx.retval;
} }
static __always_inline int static __always_inline int
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog) const void *ctx, bpf_prog_run_fn run_prog,
int retval)
{ {
const struct bpf_prog_array_item *item; const struct bpf_prog_array_item *item;
const struct bpf_prog *prog; const struct bpf_prog *prog;
const struct bpf_prog_array *array; const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx; struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx; struct bpf_cg_run_ctx run_ctx;
int ret = 0;
run_ctx.retval = retval;
migrate_disable(); migrate_disable();
rcu_read_lock(); rcu_read_lock();
array = rcu_dereference(array_rcu); array = rcu_dereference(array_rcu);
...@@ -1328,13 +1330,13 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, ...@@ -1328,13 +1330,13 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
while ((prog = READ_ONCE(item->prog))) { while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item; run_ctx.prog_item = item;
if (!run_prog(prog, ctx)) if (!run_prog(prog, ctx))
ret = -EPERM; run_ctx.retval = -EPERM;
item++; item++;
} }
bpf_reset_run_ctx(old_run_ctx); bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock(); rcu_read_unlock();
migrate_enable(); migrate_enable();
return ret; return run_ctx.retval;
} }
static __always_inline u32 static __always_inline u32
...@@ -1394,7 +1396,7 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, ...@@ -1394,7 +1396,7 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
u32 _flags = 0; \ u32 _flags = 0; \
bool _cn; \ bool _cn; \
u32 _ret; \ u32 _ret; \
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \ _cn = _flags & BPF_RET_SET_CN; \
if (!_ret) \ if (!_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
......
...@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern { ...@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
s32 level; s32 level;
s32 optname; s32 optname;
s32 optlen; s32 optlen;
s32 retval; /* for retval in struct bpf_cg_run_ctx */
struct task_struct *current_task;
/* Temporary "register" for indirect stores to ppos. */
u64 tmp_reg;
}; };
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
......
...@@ -1079,7 +1079,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, ...@@ -1079,7 +1079,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb); cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
} else { } else {
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb, ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
__bpf_prog_run_save_cb); __bpf_prog_run_save_cb, 0);
} }
bpf_restore_data_end(skb, saved_data_end); bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset); __skb_pull(skb, offset);
...@@ -1108,7 +1108,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, ...@@ -1108,7 +1108,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
bpf_prog_run); bpf_prog_run, 0);
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
...@@ -1154,7 +1154,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, ...@@ -1154,7 +1154,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run, flags); bpf_prog_run, 0, flags);
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
...@@ -1181,7 +1181,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, ...@@ -1181,7 +1181,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
bpf_prog_run); bpf_prog_run, 0);
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
...@@ -1199,7 +1199,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, ...@@ -1199,7 +1199,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
rcu_read_lock(); rcu_read_lock();
cgrp = task_dfl_cgroup(current); cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run); bpf_prog_run, 0);
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
...@@ -1330,7 +1330,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, ...@@ -1330,7 +1330,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
rcu_read_lock(); rcu_read_lock();
cgrp = task_dfl_cgroup(current); cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run); ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run, 0);
rcu_read_unlock(); rcu_read_unlock();
kfree(ctx.cur_val); kfree(ctx.cur_val);
...@@ -1445,7 +1446,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ...@@ -1445,7 +1446,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
lock_sock(sk); lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run, 0);
release_sock(sk); release_sock(sk);
if (ret) if (ret)
...@@ -1509,7 +1510,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1509,7 +1510,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
.sk = sk, .sk = sk,
.level = level, .level = level,
.optname = optname, .optname = optname,
.retval = retval, .current_task = current,
}; };
int ret; int ret;
...@@ -1553,10 +1554,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1553,10 +1554,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
lock_sock(sk); lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run, retval);
release_sock(sk); release_sock(sk);
if (ret) if (ret < 0)
goto out; goto out;
if (ctx.optlen > max_optlen || ctx.optlen < 0) { if (ctx.optlen > max_optlen || ctx.optlen < 0) {
...@@ -1564,14 +1565,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1564,14 +1565,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out; goto out;
} }
/* BPF programs only allowed to set retval to 0, not some
* arbitrary value.
*/
if (ctx.retval != 0 && ctx.retval != retval) {
ret = -EFAULT;
goto out;
}
if (ctx.optlen != 0) { if (ctx.optlen != 0) {
if (copy_to_user(optval, ctx.optval, ctx.optlen) || if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) { put_user(ctx.optlen, optlen)) {
...@@ -1580,8 +1573,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1580,8 +1573,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
} }
} }
ret = ctx.retval;
out: out:
sockopt_free_buf(&ctx, &buf); sockopt_free_buf(&ctx, &buf);
return ret; return ret;
...@@ -1596,10 +1587,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, ...@@ -1596,10 +1587,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
.sk = sk, .sk = sk,
.level = level, .level = level,
.optname = optname, .optname = optname,
.retval = retval,
.optlen = *optlen, .optlen = *optlen,
.optval = optval, .optval = optval,
.optval_end = optval + *optlen, .optval_end = optval + *optlen,
.current_task = current,
}; };
int ret; int ret;
...@@ -1612,25 +1603,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, ...@@ -1612,25 +1603,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
*/ */
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run, retval);
if (ret) if (ret < 0)
return ret; return ret;
if (ctx.optlen > *optlen) if (ctx.optlen > *optlen)
return -EFAULT; return -EFAULT;
/* BPF programs only allowed to set retval to 0, not some
* arbitrary value.
*/
if (ctx.retval != 0 && ctx.retval != retval)
return -EFAULT;
/* BPF programs can shrink the buffer, export the modifications. /* BPF programs can shrink the buffer, export the modifications.
*/ */
if (ctx.optlen != 0) if (ctx.optlen != 0)
*optlen = ctx.optlen; *optlen = ctx.optlen;
return ctx.retval; return ret;
} }
#endif #endif
...@@ -2046,10 +2031,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, ...@@ -2046,10 +2031,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
break; break;
case offsetof(struct bpf_sockopt, retval): case offsetof(struct bpf_sockopt, retval):
if (type == BPF_WRITE) BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
else if (type == BPF_WRITE) {
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval); int treg = BPF_REG_9;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
offsetof(struct bpf_sockopt_kern, tmp_reg));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
treg, si->dst_reg,
offsetof(struct bpf_sockopt_kern, current_task));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
treg, treg,
offsetof(struct task_struct, bpf_ctx));
*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
treg, si->src_reg,
offsetof(struct bpf_cg_run_ctx, retval));
*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sockopt_kern, tmp_reg));
} else {
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sockopt_kern, current_task));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
si->dst_reg, si->dst_reg,
offsetof(struct task_struct, bpf_ctx));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
si->dst_reg, si->dst_reg,
offsetof(struct bpf_cg_run_ctx, retval));
}
break; break;
case offsetof(struct bpf_sockopt, optval): case offsetof(struct bpf_sockopt, optval):
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册