diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 9ecde517728c317ac7428efd616536a0a90f301e..2793d4eac55fd7e12993b3045657fd4156499d06 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -92,6 +92,14 @@ Values : 0 - disable JIT kallsyms export (default value) 1 - enable JIT kallsyms export for privileged users only +bpf_jit_limit +------------- + +This enforces a global limit for memory allocations to the BPF JIT +compiler in order to reject unprivileged JIT requests once it has +been surpassed. bpf_jit_limit contains the value of the global limit +in bytes. + dev_weight -------------- diff --git a/include/linux/filter.h b/include/linux/filter.h index 91b4c934f02e7ca0f8aa3c8dfcc6f51233d7d05e..de629b706d1d7dda246819ed37109f098c05785e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -854,6 +854,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; +extern int bpf_jit_limit; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 378cef70341c4e5b4b137bac7ed84aab75529af0..ee4c82667d659019022f42cfc3c4a13cf9fc9124 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2067,56 +2067,47 @@ static int btf_check_sec_info(struct btf_verifier_env *env, return 0; } -static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data, - u32 btf_data_size) +static int btf_parse_hdr(struct btf_verifier_env *env) { + u32 hdr_len, hdr_copy, btf_data_size; const struct btf_header *hdr; - u32 hdr_len, hdr_copy; - /* - * Minimal part of the "struct btf_header" that - * contains the hdr_len. - */ - struct btf_min_header { - u16 magic; - u8 version; - u8 flags; - u32 hdr_len; - } __user *min_hdr; struct btf *btf; int err; btf = env->btf; - min_hdr = btf_data; + btf_data_size = btf->data_size; - if (btf_data_size < sizeof(*min_hdr)) { + if (btf_data_size < + offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) { btf_verifier_log(env, "hdr_len not found"); return -EINVAL; } - if (get_user(hdr_len, &min_hdr->hdr_len)) - return -EFAULT; - + hdr = btf->data; + hdr_len = hdr->hdr_len; if (btf_data_size < hdr_len) { btf_verifier_log(env, "btf_header not found"); return -EINVAL; } - err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len); - if (err) { - if (err == -E2BIG) - btf_verifier_log(env, "Unsupported btf_header"); - return err; + /* Ensure the unsupported header fields are zero */ + if (hdr_len > sizeof(btf->hdr)) { + u8 *expected_zero = btf->data + sizeof(btf->hdr); + u8 *end = btf->data + hdr_len; + + for (; expected_zero < end; expected_zero++) { + if (*expected_zero) { + btf_verifier_log(env, "Unsupported btf_header"); + return -E2BIG; + } + } } hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr)); - if (copy_from_user(&btf->hdr, btf_data, hdr_copy)) - return -EFAULT; + memcpy(&btf->hdr, btf->data, hdr_copy); hdr = &btf->hdr; - if (hdr->hdr_len != hdr_len) - return -EINVAL; - btf_verifier_log_hdr(env, btf_data_size); if (hdr->magic != BTF_MAGIC) { @@ -2186,10 +2177,6 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, } env->btf = btf; - err = btf_parse_hdr(env, btf_data, btf_data_size); - if (err) - goto errout; - data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN); if (!data) { err = -ENOMEM; @@ -2198,13 +2185,18 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, btf->data = data; btf->data_size = btf_data_size; - btf->nohdr_data = btf->data + btf->hdr.hdr_len; if (copy_from_user(data, btf_data, btf_data_size)) { err = -EFAULT; goto errout; } + err = btf_parse_hdr(env); + if (err) + goto errout; + + btf->nohdr_data = btf->data + btf->hdr.hdr_len; + err = btf_parse_str_sec(env); if (err) goto errout; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7c7eeea8cffcc38124870ff1a8dc350cad393ac2..6377225b208204c1c2d8829a778f50ebaa7d816d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -365,10 +365,13 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) } #ifdef CONFIG_BPF_JIT +# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) + /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; +int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -577,27 +580,64 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, return ret; } +static atomic_long_t bpf_jit_current; + +#if defined(MODULES_VADDR) +static int __init bpf_jit_charge_init(void) +{ + /* Only used as heuristic here to derive limit. */ + bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, + PAGE_SIZE), INT_MAX); + return 0; +} +pure_initcall(bpf_jit_charge_init); +#endif + +static int bpf_jit_charge_modmem(u32 pages) +{ + if (atomic_long_add_return(pages, &bpf_jit_current) > + (bpf_jit_limit >> PAGE_SHIFT)) { + if (!capable(CAP_SYS_ADMIN)) { + atomic_long_sub(pages, &bpf_jit_current); + return -EPERM; + } + } + + return 0; +} + +static void bpf_jit_uncharge_modmem(u32 pages) +{ + atomic_long_sub(pages, &bpf_jit_current); +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; - unsigned int size, hole, start; + u32 size, hole, start, pages; /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + pages = size / PAGE_SIZE; + + if (bpf_jit_charge_modmem(pages)) + return NULL; hdr = module_alloc(size); - if (hdr == NULL) + if (!hdr) { + bpf_jit_uncharge_modmem(pages); return NULL; + } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); - hdr->pages = size / PAGE_SIZE; + hdr->pages = pages; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -610,7 +650,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { + u32 pages = hdr->pages; + module_memfree(hdr); + bpf_jit_uncharge_modmem(pages); } /* This symbol is only overridden by archs that have different diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 141710b82a6c4b117ecc819c856921f416a7aaf6..191b79948424f4b21b7aa120abc03801264bf0a6 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -512,8 +512,7 @@ static int dev_map_notification(struct notifier_block *notifier, struct bpf_dtab_netdev *dev, *odev; dev = READ_ONCE(dtab->netdev_map[i]); - if (!dev || - dev->dev->ifindex != netdev->ifindex) + if (!dev || netdev != dev->dev) continue; odev = cmpxchg(&dtab->netdev_map[i], dev, NULL); if (dev == odev) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ab0d5e3f9892bc7f01ef6a5609102d25617dda85..a74972b07e7467deb018853b554027faf20c26d8 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -99,7 +99,6 @@ BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) const struct bpf_func_proto bpf_map_pop_elem_proto = { .func = bpf_map_pop_elem, .gpl_only = false, - .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, @@ -113,7 +112,6 @@ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) const struct bpf_func_proto bpf_map_peek_elem_proto = { .func = bpf_map_pop_elem, .gpl_only = false, - .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 12a93fb374492b0ff848681a0705382675fbbc40..8bbd72d3a121f4e1cb75effd9c5fe5b96728a6b5 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -122,6 +122,7 @@ static int __queue_map_get(struct bpf_map *map, void *value, bool delete) raw_spin_lock_irqsave(&qs->lock, flags); if (queue_stack_map_is_empty(qs)) { + memset(value, 0, qs->map.value_size); err = -ENOENT; goto out; } @@ -151,6 +152,7 @@ static int __stack_map_get(struct bpf_map *map, void *value, bool delete) raw_spin_lock_irqsave(&qs->lock, flags); if (queue_stack_map_is_empty(qs)) { + memset(value, 0, qs->map.value_size); err = -ENOENT; goto out; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98fa0be35370b4fd83eb9c924a9afb45747e6472..171a2c88e77ddd28ea5f9d5ba135d424cd09714e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1387,21 +1387,24 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, enum bpf_access_type t) { switch (env->prog->type) { + /* Program types only with direct read access go here! */ case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_SK_REUSEPORT: - /* dst_input() and dst_output() can't write for now */ + case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_CGROUP_SKB: if (t == BPF_WRITE) return false; /* fallthrough */ + + /* Program types with direct read + write access go here! */ case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_SK_SKB: case BPF_PROG_TYPE_SK_MSG: - case BPF_PROG_TYPE_FLOW_DISSECTOR: if (meta) return meta->pkt_access; @@ -5706,7 +5709,11 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) bool is_narrower_load; u32 target_size; - if (ops->gen_prologue) { + if (ops->gen_prologue || env->seen_direct_write) { + if (!ops->gen_prologue) { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog); if (cnt >= ARRAY_SIZE(insn_buf)) { diff --git a/net/core/filter.c b/net/core/filter.c index 35c6933c262298d0951725ea255d70bbdd294353..e521c5ebc7d11cdfdcc10307ad973bcac2d1602a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5264,8 +5264,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_msg_pull_data_proto; case BPF_FUNC_msg_push_data: return &bpf_msg_push_data_proto; - case BPF_FUNC_get_local_storage: - return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -5296,8 +5294,6 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_map_proto; case BPF_FUNC_sk_redirect_hash: return &bpf_sk_redirect_hash_proto; - case BPF_FUNC_get_local_storage: - return &bpf_get_local_storage_proto; #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sk_lookup_tcp_proto; @@ -5496,7 +5492,13 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): return false; + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): + if (!capable(CAP_SYS_ADMIN)) + return false; + break; } + if (type == BPF_WRITE) { switch (off) { case bpf_ctx_range(struct __sk_buff, mark): @@ -5638,6 +5640,15 @@ static bool sock_filter_is_valid_access(int off, int size, prog->expected_attach_type); } +static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + /* Neither direct read nor direct write requires any preliminary + * action. + */ + return 0; +} + static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog, int drop_verdict) { @@ -7204,6 +7215,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, }; const struct bpf_prog_ops xdp_prog_ops = { @@ -7302,6 +7314,7 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = { .get_func_proto = sk_msg_func_proto, .is_valid_access = sk_msg_is_valid_access, .convert_ctx_access = sk_msg_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, }; const struct bpf_prog_ops sk_msg_prog_ops = { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b1a2c5e38530a72478cab1a874cb7f78909a118b..37b4667128a3808395e23b0c53325a5d937c6b54 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -279,7 +279,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } -# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -290,7 +289,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } -# endif #endif static struct ctl_table net_core_table[] = { @@ -397,6 +395,14 @@ static struct ctl_table net_core_table[] = { .extra2 = &one, }, # endif + { + .procname = "bpf_jit_limit", + .data = &bpf_jit_limit, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &one, + }, #endif { .procname = "netdev_tstamp_prequeue", diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index dd49df5e2df4f64eb82559c2754f7a6a5d5bdccc..7f90d3645af892d88373926ad7eee85a77f113b1 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -20,3 +20,5 @@ CONFIG_VXLAN=y CONFIG_GENEVE=y CONFIG_NET_CLS_FLOWER=m CONFIG_LWTUNNEL=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 769d68a48f303960781c07a99c69be15b0691e5b..36f3d3009d1a079e57032b47b57bc306d045a3a8 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -4891,6 +4891,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=76 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { @@ -5146,6 +5148,7 @@ static struct bpf_test tests[] = { .fixup_cgroup_storage = { 1 }, .result = REJECT, .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { @@ -5261,6 +5264,7 @@ static struct bpf_test tests[] = { .fixup_percpu_cgroup_storage = { 1 }, .result = REJECT, .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { @@ -14050,6 +14054,13 @@ static void get_unpriv_disabled() fclose(fd); } +static bool test_as_unpriv(struct bpf_test *test) +{ + return !test->prog_type || + test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER || + test->prog_type == BPF_PROG_TYPE_CGROUP_SKB; +} + static int do_test(bool unpriv, unsigned int from, unsigned int to) { int i, passes = 0, errors = 0, skips = 0; @@ -14060,10 +14071,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) /* Program types that are not supported by non-root we * skip right away. */ - if (!test->prog_type && unpriv_disabled) { + if (test_as_unpriv(test) && unpriv_disabled) { printf("#%d/u %s SKIP\n", i, test->descr); skips++; - } else if (!test->prog_type) { + } else if (test_as_unpriv(test)) { if (!unpriv) set_admin(false); printf("#%d/u %s ", i, test->descr);