提交 b910eaaa 编写于 作者: Y Yonghong Song 提交者: Alexei Starovoitov

bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper

Jiri Olsa reported a bug ([1]) in kernel where cgroup local
storage pointer may be NULL in bpf_get_local_storage() helper.
There are two issues uncovered by this bug:
  (1). kprobe or tracepoint prog incorrectly sets cgroup local storage
       before prog run,
  (2). due to change from preempt_disable to migrate_disable,
       preemption is possible and percpu storage might be overwritten
       by other tasks.

This issue (1) is fixed in [2]. This patch tried to address issue (2).
The following shows how things can go wrong:
  task 1:   bpf_cgroup_storage_set() for percpu local storage
         preemption happens
  task 2:   bpf_cgroup_storage_set() for percpu local storage
         preemption happens
  task 1:   run bpf program

task 1 will effectively use the percpu local storage setting by task 2
which will be either NULL or incorrect ones.

Instead of just one common local storage per cpu, this patch fixed
the issue by permitting 8 local storages per cpu and each local
storage is identified by a task_struct pointer. This way, we
allow at most 8 nested preemption between bpf_cgroup_storage_set()
and bpf_cgroup_storage_unset(). The percpu local storage slot
is released (calling bpf_cgroup_storage_unset()) by the same task
after bpf program finished running.
bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
interface.

The patch is tested on top of [2] with reproducer in [1].
Without this patch, kernel will emit error in 2-3 minutes.
With this patch, after one hour, still no error.

 [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T
 [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.comSigned-off-by: NYonghong Song <yhs@fb.com>
Signed-off-by: NAlexei Starovoitov <ast@kernel.org>
Acked-by: NRoman Gushchin <guro@fb.com>
Link: https://lore.kernel.org/bpf/20210323055146.3334476-1-yhs@fb.com
上级 a46410d5
...@@ -20,14 +20,25 @@ struct bpf_sock_ops_kern; ...@@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
struct bpf_cgroup_storage; struct bpf_cgroup_storage;
struct ctl_table; struct ctl_table;
struct ctl_table_header; struct ctl_table_header;
struct task_struct;
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
DECLARE_PER_CPU(struct bpf_cgroup_storage*, #define BPF_CGROUP_STORAGE_NEST_MAX 8
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
struct bpf_cgroup_storage_info {
struct task_struct *task;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};
/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
* to use bpf cgroup storage simultaneously.
*/
DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
#define for_each_cgroup_storage_type(stype) \ #define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
...@@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type( ...@@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
return BPF_CGROUP_STORAGE_SHARED; return BPF_CGROUP_STORAGE_SHARED;
} }
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
*storage[MAX_BPF_CGROUP_STORAGE_TYPE]) *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{ {
enum bpf_cgroup_storage_type stype; enum bpf_cgroup_storage_type stype;
int i, err = 0;
preempt_disable();
for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
continue;
this_cpu_write(bpf_cgroup_storage_info[i].task, current);
for_each_cgroup_storage_type(stype)
this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
storage[stype]);
goto out;
}
err = -EBUSY;
WARN_ON_ONCE(1);
out:
preempt_enable();
return err;
}
static inline void bpf_cgroup_storage_unset(void)
{
int i;
for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
continue;
for_each_cgroup_storage_type(stype) this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); return;
}
} }
struct bpf_cgroup_storage * struct bpf_cgroup_storage *
...@@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, ...@@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL; return -EINVAL;
} }
static inline void bpf_cgroup_storage_set( static inline int bpf_cgroup_storage_set(
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; } struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
......
...@@ -1106,6 +1106,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, ...@@ -1106,6 +1106,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
/* BPF program asks to set CN on the packet. */ /* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0) #define BPF_RET_SET_CN (1 << 0)
/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
* if bpf_cgroup_storage_set() failed, the rest of programs
* will not execute. This should be a really rare scenario
* as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
* preemptions all between bpf_cgroup_storage_set() and
* bpf_cgroup_storage_unset() on the same cpu.
*/
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
({ \ ({ \
struct bpf_prog_array_item *_item; \ struct bpf_prog_array_item *_item; \
...@@ -1118,10 +1125,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, ...@@ -1118,10 +1125,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
_array = rcu_dereference(array); \ _array = rcu_dereference(array); \
_item = &_array->items[0]; \ _item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \ while ((_prog = READ_ONCE(_item->prog))) { \
bpf_cgroup_storage_set(_item->cgroup_storage); \ if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
break; \
func_ret = func(_prog, ctx); \ func_ret = func(_prog, ctx); \
_ret &= (func_ret & 1); \ _ret &= (func_ret & 1); \
*(ret_flags) |= (func_ret >> 1); \ *(ret_flags) |= (func_ret >> 1); \
bpf_cgroup_storage_unset(); \
_item++; \ _item++; \
} \ } \
rcu_read_unlock(); \ rcu_read_unlock(); \
...@@ -1142,9 +1151,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, ...@@ -1142,9 +1151,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
goto _out; \ goto _out; \
_item = &_array->items[0]; \ _item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \ while ((_prog = READ_ONCE(_item->prog))) { \
if (set_cg_storage) \ if (!set_cg_storage) { \
bpf_cgroup_storage_set(_item->cgroup_storage); \ _ret &= func(_prog, ctx); \
_ret &= func(_prog, ctx); \ } else { \
if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
break; \
_ret &= func(_prog, ctx); \
bpf_cgroup_storage_unset(); \
} \
_item++; \ _item++; \
} \ } \
_out: \ _out: \
......
...@@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { ...@@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
}; };
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
DECLARE_PER_CPU(struct bpf_cgroup_storage*, DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{ {
...@@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) ...@@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
* verifier checks that its value is correct. * verifier checks that its value is correct.
*/ */
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
struct bpf_cgroup_storage *storage; struct bpf_cgroup_storage *storage = NULL;
void *ptr; void *ptr;
int i;
storage = this_cpu_read(bpf_cgroup_storage[stype]); for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
continue;
storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
break;
}
if (stype == BPF_CGROUP_STORAGE_SHARED) if (stype == BPF_CGROUP_STORAGE_SHARED)
ptr = &READ_ONCE(storage->buf)->data[0]; ptr = &READ_ONCE(storage->buf)->data[0];
......
...@@ -9,10 +9,11 @@ ...@@ -9,10 +9,11 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <uapi/linux/btf.h> #include <uapi/linux/btf.h>
DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
#include "../cgroup/cgroup-internal.h" #include "../cgroup/cgroup-internal.h"
#define LOCAL_STORAGE_CREATE_FLAG_MASK \ #define LOCAL_STORAGE_CREATE_FLAG_MASK \
......
...@@ -106,12 +106,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, ...@@ -106,12 +106,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
bpf_test_timer_enter(&t); bpf_test_timer_enter(&t);
do { do {
bpf_cgroup_storage_set(storage); ret = bpf_cgroup_storage_set(storage);
if (ret)
break;
if (xdp) if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx); *retval = bpf_prog_run_xdp(prog, ctx);
else else
*retval = BPF_PROG_RUN(prog, ctx); *retval = BPF_PROG_RUN(prog, ctx);
bpf_cgroup_storage_unset();
} while (bpf_test_timer_continue(&t, repeat, &ret, time)); } while (bpf_test_timer_continue(&t, repeat, &ret, time));
bpf_test_timer_leave(&t); bpf_test_timer_leave(&t);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册