提交 c4dcfdd4 编写于 作者: Y YiFei Zhu 提交者: Alexei Starovoitov

bpf: Move getsockopt retval to struct bpf_cg_run_ctx

The retval value is moved to struct bpf_cg_run_ctx for ease of access
in different prog types with different context structs layouts. The
helper implementation (to be added in a later patch in the series) can
simply perform a container_of from current->bpf_ctx to retrieve
bpf_cg_run_ctx.

Unfortunately, there is no easy way to access the current task_struct
via the verifier BPF bytecode rewrite, aside from possibly calling a
helper, so a pointer to current task is added to struct bpf_sockopt_kern
so that the rewritten BPF bytecode can access struct bpf_cg_run_ctx with
an indirection.

For backward compatibility, if a getsockopt program rejects a syscall
by returning 0, an -EPERM will be generated, by having the
BPF_PROG_RUN_ARRAY_CG family macros automatically set the retval to
-EPERM. Unlike prior to this patch, this -EPERM will be visible to
ctx->retval for any other hooks down the line in the prog array.

Additionally, the restriction that getsockopt filters can only set
the retval to 0 is removed, considering that certain getsockopt
implementations may return optlen. Filters are now able to set the
value arbitrarily.
Signed-off-by: NYiFei Zhu <zhuyifei@google.com>
Reviewed-by: NStanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/73b0325f5c29912ccea7ea57ec1ed4d388fc1d37.1639619851.git.zhuyifei@google.comSigned-off-by: NAlexei Starovoitov <ast@kernel.org>
上级 f10d0596
......@@ -1245,6 +1245,7 @@ struct bpf_run_ctx {};
struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx;
const struct bpf_prog_array_item *prog_item;
int retval;
};
struct bpf_trace_run_ctx {
......@@ -1280,16 +1281,16 @@ typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
static __always_inline int
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog,
u32 *ret_flags)
int retval, u32 *ret_flags)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
int ret = 0;
u32 func_ret;
run_ctx.retval = retval;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
......@@ -1299,27 +1300,28 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
run_ctx.prog_item = item;
func_ret = run_prog(prog, ctx);
if (!(func_ret & 1))
ret = -EPERM;
run_ctx.retval = -EPERM;
*(ret_flags) |= (func_ret >> 1);
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
return ret;
return run_ctx.retval;
}
static __always_inline int
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog)
const void *ctx, bpf_prog_run_fn run_prog,
int retval)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx;
int ret = 0;
run_ctx.retval = retval;
migrate_disable();
rcu_read_lock();
array = rcu_dereference(array_rcu);
......@@ -1328,13 +1330,13 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item;
if (!run_prog(prog, ctx))
ret = -EPERM;
run_ctx.retval = -EPERM;
item++;
}
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
migrate_enable();
return ret;
return run_ctx.retval;
}
static __always_inline u32
......@@ -1394,7 +1396,7 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
u32 _flags = 0; \
bool _cn; \
u32 _ret; \
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \
if (!_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
......
......@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
s32 level;
s32 optname;
s32 optlen;
s32 retval;
/* for retval in struct bpf_cg_run_ctx */
struct task_struct *current_task;
/* Temporary "register" for indirect stores to ppos. */
u64 tmp_reg;
};
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
......
......@@ -1079,7 +1079,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
} else {
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
__bpf_prog_run_save_cb);
__bpf_prog_run_save_cb, 0);
}
bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
......@@ -1108,7 +1108,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
bpf_prog_run);
bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
......@@ -1154,7 +1154,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run, flags);
bpf_prog_run, 0, flags);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
......@@ -1181,7 +1181,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
bpf_prog_run);
bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
......@@ -1199,7 +1199,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run);
bpf_prog_run, 0);
rcu_read_unlock();
return ret;
......@@ -1330,7 +1330,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run, 0);
rcu_read_unlock();
kfree(ctx.cur_val);
......@@ -1445,7 +1446,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
&ctx, bpf_prog_run);
&ctx, bpf_prog_run, 0);
release_sock(sk);
if (ret)
......@@ -1509,7 +1510,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
.sk = sk,
.level = level,
.optname = optname,
.retval = retval,
.current_task = current,
};
int ret;
......@@ -1553,10 +1554,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
&ctx, bpf_prog_run);
&ctx, bpf_prog_run, retval);
release_sock(sk);
if (ret)
if (ret < 0)
goto out;
if (ctx.optlen > max_optlen || ctx.optlen < 0) {
......@@ -1564,14 +1565,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}
/* BPF programs only allowed to set retval to 0, not some
* arbitrary value.
*/
if (ctx.retval != 0 && ctx.retval != retval) {
ret = -EFAULT;
goto out;
}
if (ctx.optlen != 0) {
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) {
......@@ -1580,8 +1573,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
}
}
ret = ctx.retval;
out:
sockopt_free_buf(&ctx, &buf);
return ret;
......@@ -1596,10 +1587,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
.sk = sk,
.level = level,
.optname = optname,
.retval = retval,
.optlen = *optlen,
.optval = optval,
.optval_end = optval + *optlen,
.current_task = current,
};
int ret;
......@@ -1612,25 +1603,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
*/
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
&ctx, bpf_prog_run);
if (ret)
&ctx, bpf_prog_run, retval);
if (ret < 0)
return ret;
if (ctx.optlen > *optlen)
return -EFAULT;
/* BPF programs only allowed to set retval to 0, not some
* arbitrary value.
*/
if (ctx.retval != 0 && ctx.retval != retval)
return -EFAULT;
/* BPF programs can shrink the buffer, export the modifications.
*/
if (ctx.optlen != 0)
*optlen = ctx.optlen;
return ctx.retval;
return ret;
}
#endif
......@@ -2046,10 +2031,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
break;
case offsetof(struct bpf_sockopt, retval):
if (type == BPF_WRITE)
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
else
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
if (type == BPF_WRITE) {
int treg = BPF_REG_9;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
offsetof(struct bpf_sockopt_kern, tmp_reg));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
treg, si->dst_reg,
offsetof(struct bpf_sockopt_kern, current_task));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
treg, treg,
offsetof(struct task_struct, bpf_ctx));
*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
treg, si->src_reg,
offsetof(struct bpf_cg_run_ctx, retval));
*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sockopt_kern, tmp_reg));
} else {
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sockopt_kern, current_task));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
si->dst_reg, si->dst_reg,
offsetof(struct task_struct, bpf_ctx));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
si->dst_reg, si->dst_reg,
offsetof(struct bpf_cg_run_ctx, retval));
}
break;
case offsetof(struct bpf_sockopt, optval):
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册