diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h index 33f9058ed32e29ab5ef27633d9e89950a9afdd6c..4268a7e0f344202a044f794a677e1b50231c0891 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h @@ -13,8 +13,8 @@ */ #define NFP_BPF_SCALAR_VALUE 1 #define NFP_BPF_MAP_VALUE 4 -#define NFP_BPF_STACK 5 -#define NFP_BPF_PACKET_DATA 7 +#define NFP_BPF_STACK 6 +#define NFP_BPF_PACKET_DATA 8 enum bpf_cap_tlv_type { NFP_BPF_CAP_TYPE_FUNC = 1, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c51f364770de970bb80a531b68c8f1ea62d60bd5..beb5a974abebd3bcfe7b3e9601049a77a398424e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -37,6 +37,7 @@ struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; +struct bpf_func_state; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -127,6 +128,13 @@ struct bpf_map_ops { bool (*map_meta_equal)(const struct bpf_map *meta0, const struct bpf_map *meta1); + + int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn, + void *callback_ctx, u64 flags); + /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; @@ -324,6 +332,8 @@ enum bpf_arg_type { ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ + ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -428,6 +438,7 @@ enum bpf_reg_type { PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ PTR_TO_STACK, /* reg == frame_pointer + offset */ PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ @@ -455,6 +466,7 @@ enum bpf_reg_type { */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_BUF, /* reg points to a read/write buffer */ + PTR_TO_FUNC, /* reg points to a bpf program function */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ __BPF_REG_TYPE_MAX, @@ -1999,6 +2011,7 @@ extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; +extern const struct bpf_func_proto bpf_for_each_map_elem_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index bea28c220829a855cc1bc58ad0fdd2893572325a..ef49a22433b1466a7294401d2d759aa93544dcb9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -70,6 +70,8 @@ struct bpf_reg_state { unsigned long raw1; unsigned long raw2; } raw; + + u32 subprogno; /* for PTR_TO_FUNC */ }; /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. @@ -206,6 +208,7 @@ struct bpf_func_state { int acquired_refs; struct bpf_reference_state *refs; int allocated_stack; + bool in_callback_fn; struct bpf_stack_state *stack; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 53b4e42def740a5b48f65c44a0eea38e41b2822e..5383682f8faccf9ad6e9e7233ad916457e3c5f02 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -394,6 +394,15 @@ enum bpf_link_type { * is struct/union. */ #define BPF_PSEUDO_BTF_ID 3 +/* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ +#define BPF_PSEUDO_FUNC 4 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -3923,6 +3932,34 @@ union bpf_attr { * set cpus_ptr in task. * Return * 0 on success, or a negative error in case of failure. + * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For each element in **map**, call **callback_fn** function with + * **map**, **callback_ctx** and other map-specific parameters. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * The following are a list of supported map types and their + * respective expected callback signatures: + * + * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, + * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY + * + * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); + * + * For per_cpu maps, the map_value is the value on the cpu where the + * bpf_prog is running. + * + * If **callback_fn** return 0, the helper will continue to the next + * element. If return value is 1, the helper will skip the rest of + * elements and return. Other return values are not used now. + * + * Return + * The number of traversed map elements for success, **-EINVAL** for + * invalid **flags**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4102,6 +4139,7 @@ union bpf_attr { FN(cpumask_op), \ FN(cpus_share_cache), \ FN(sched_set_task_cpus_ptr), \ + FN(for_each_map_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index e8957e911de31546947d92601728f7b6d6afa26c..10319005b95bbd514a63dd8d7cb715511963c0d1 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -661,3 +661,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) */ return ret == 0 ? 0 : -EAGAIN; } + +BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, + void *, callback_ctx, u64, flags) +{ + return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); +} + +const struct bpf_func_proto bpf_for_each_map_elem_proto = { + .func = bpf_for_each_map_elem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_FUNC, + .arg3_type = ARG_PTR_TO_STACK_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5fccf33196b5dff4c249e4fc608128f076348342..24adcb5d999c3112be9d7f7870377c4d3588e11d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -705,6 +705,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_sched_tg_tag_of_proto; case BPF_FUNC_sched_task_tag_of: return &bpf_sched_task_tag_of_proto; + case BPF_FUNC_for_each_map_elem: + return &bpf_for_each_map_elem_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2cf056148e9f6af21e3d5fa669dd8946d789ddf8..d189862a2c3f1cc3cae068fe053bdeeeb13c0d60 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -235,6 +235,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_CALL; } +static bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@ -249,6 +255,7 @@ struct bpf_call_arg_meta { u32 btf_id; struct btf *ret_btf; u32 ret_btf_id; + u32 subprogno; }; struct btf *btf_vmlinux; @@ -428,6 +435,7 @@ static bool reg_type_not_null(enum bpf_reg_type type) return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || + type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON; } @@ -538,6 +546,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env, [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", [PTR_TO_MEM] = "mem", [PTR_TO_BUF] = "buf", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", }; if (type & PTR_MAYBE_NULL) { @@ -626,6 +636,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (type_is_pkt_pointer(t)) verbose(env, ",r=%d", reg->range); else if (base_type(t) == CONST_PTR_TO_MAP || + base_type(t) == PTR_TO_MAP_KEY || base_type(t) == PTR_TO_MAP_VALUE) verbose(env, ",ks=%d,vs=%d", reg->map_ptr->key_size, @@ -1538,6 +1549,19 @@ static int check_subprogs(struct bpf_verifier_env *env) /* determine subprog starts. The end is one before the next starts */ for (i = 0; i < insn_cnt; i++) { + if (bpf_pseudo_func(insn + i)) { + if (!env->bpf_capable) { + verbose(env, + "function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); + return -EPERM; + } + ret = add_subprog(env, i + insn[i].imm + 1); + if (ret < 0) + return ret; + /* remember subprog */ + insn[i + 1].imm = ret; + continue; + } if (!bpf_pseudo_call(insn + i)) continue; if (!env->bpf_capable) { @@ -2260,6 +2284,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_BUF: case PTR_TO_PERCPU_BTF_ID: case PTR_TO_MEM: + case PTR_TO_FUNC: + case PTR_TO_MAP_KEY: return true; default: return false; @@ -2840,6 +2866,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno, reg = &cur_regs(env)[regno]; switch (reg->type) { + case PTR_TO_MAP_KEY: + verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n", + mem_size, off, size); + break; case PTR_TO_MAP_VALUE: verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size); @@ -3243,6 +3273,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_FLOW_KEYS: pointer_desc = "flow keys "; break; + case PTR_TO_MAP_KEY: + pointer_desc = "key "; + break; case PTR_TO_MAP_VALUE: pointer_desc = "value "; break; @@ -3344,7 +3377,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { - if (!bpf_pseudo_call(insn + i)) + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ ret_insn[frame] = i + 1; @@ -3806,7 +3839,19 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn /* for access checks, reg->off is just part of off */ off += reg->off; - if (reg->type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_KEY) { + if (t == BPF_WRITE) { + verbose(env, "write to change key R%d not allowed\n", regno); + return -EACCES; + } + + err = check_mem_region_access(env, regno, off, size, + reg->map_ptr->key_size, false); + if (err) + return err; + if (value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_MAP_VALUE) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into map\n", value_regno); @@ -4239,6 +4284,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); + case PTR_TO_MAP_KEY: + return check_mem_region_access(env, regno, reg->off, access_size, + reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: if (check_map_access_type(env, regno, reg->off, access_size, meta && meta->raw_mode ? BPF_WRITE : @@ -4456,6 +4504,7 @@ static const struct bpf_reg_types map_key_value_types = { PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@ -4487,6 +4536,7 @@ static const struct bpf_reg_types mem_types = { PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, PTR_TO_MEM | MEM_ALLOC, @@ -4499,6 +4549,7 @@ static const struct bpf_reg_types int_ptr_types = { PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@ -4511,6 +4562,8 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; +static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; +static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -4534,6 +4587,8 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, + [ARG_PTR_TO_FUNC] = &func_ptr_types, + [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -4746,6 +4801,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, verbose(env, "verifier internal error\n"); return -EFAULT; } + } else if (arg_type == ARG_PTR_TO_FUNC) { + meta->subprogno = reg->subprogno; } else if (arg_type_is_mem_ptr(arg_type)) { /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. @@ -5360,6 +5417,35 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); } +static int set_map_elem_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx]; + struct bpf_map *map; + int err; + + if (bpf_map_ptr_poisoned(insn_aux)) { + verbose(env, "tail_call abusing map_ptr\n"); + return -EINVAL; + } + + map = BPF_MAP_PTR(insn_aux->map_ptr_state); + if (!map->ops->map_set_for_each_callback_args || + !map->ops->map_for_each_callback) { + verbose(env, "callback function not allowed for map\n"); + return -ENOTSUPP; + } + + err = map->ops->map_set_for_each_callback_args(env, caller, callee); + if (err) + return err; + + callee->in_callback_fn = true; + return 0; +} + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@ -5382,8 +5468,22 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) state->curframe--; caller = state->frame[state->curframe]; - /* return to the caller whatever r0 had in the callee */ - caller->regs[BPF_REG_0] = *r0; + if (callee->in_callback_fn) { + /* enforce R0 return value range [0, 1]. */ + struct tnum range = tnum_range(0, 1); + + if (r0->type != SCALAR_VALUE) { + verbose(env, "R0 not a scalar value\n"); + return -EACCES; + } + if (!tnum_in(range, r0->var_off)) { + verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); + return -EINVAL; + } + } else { + /* return to the caller whatever r0 had in the callee */ + caller->regs[BPF_REG_0] = *r0; + } /* Transfer references to the caller */ err = transfer_reference_state(caller, callee); @@ -5438,7 +5538,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_map_push_elem && func_id != BPF_FUNC_map_pop_elem && - func_id != BPF_FUNC_map_peek_elem) + func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_for_each_map_elem) return 0; if (map == NULL) { @@ -5519,17 +5620,20 @@ static int check_reference_leak(struct bpf_verifier_env *env) return state->acquired_refs ? -EINVAL : 0; } -static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) +static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct bpf_func_proto *fn = NULL; enum bpf_return_type ret_type; enum bpf_type_flag ret_flag; struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; + int insn_idx = *insn_idx_p; bool changes_data; - int i, err; + int i, err, func_id; /* find function prototype */ + func_id = insn->imm; if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); @@ -5625,6 +5729,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + if (func_id == BPF_FUNC_for_each_map_elem) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_map_elem_callback_state); + if (err < 0) + return -EINVAL; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -8364,6 +8475,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } + if (insn->src_reg == BPF_PSEUDO_FUNC) { + struct bpf_prog_aux *aux = env->prog->aux; + u32 subprogno = insn[1].imm; + + if (!aux->func_info) { + verbose(env, "missing btf func_info\n"); + return -EINVAL; + } + if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) { + verbose(env, "callback function not static\n"); + return -EINVAL; + } + + dst_reg->type = PTR_TO_FUNC; + dst_reg->subprogno = subprogno; + return 0; + } + map = env->used_maps[aux->map_index]; dst_reg->map_ptr = map; @@ -8746,6 +8875,9 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env) struct bpf_insn *insns = env->prog->insnsi; int ret; + if (bpf_pseudo_func(insns + t)) + return visit_func_call_insn(t, insn_cnt, insns, env, true); + /* All non-branch instructions have a single fall-through edge. */ if (BPF_CLASS(insns[t].code) != BPF_JMP && BPF_CLASS(insns[t].code) != BPF_JMP32) @@ -9367,6 +9499,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, */ return false; } + case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: /* a PTR_TO_MAP_VALUE could be safe to use as a * PTR_TO_MAP_VALUE_OR_NULL into the same map. @@ -10198,10 +10331,9 @@ static int do_check(struct bpf_verifier_env *env) if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else - err = check_helper_call(env, insn->imm, env->insn_idx); + err = check_helper_call(env, insn, &env->insn_idx); if (err) return err; - } else if (opcode == BPF_JA) { if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || @@ -10618,6 +10750,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) goto next_insn; } + if (insn[0].src_reg == BPF_PSEUDO_FUNC) { + aux = &env->insn_aux_data[i]; + aux->ptr_type = PTR_TO_FUNC; + goto next_insn; + } + /* In final convert_pseudo_ld_imm64() step, this is * converted into regular 64-bit imm load insn. */ @@ -10750,9 +10888,13 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) int insn_cnt = env->prog->len; int i; - for (i = 0; i < insn_cnt; i++, insn++) - if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) - insn->src_reg = 0; + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) + continue; + if (insn->src_reg == BPF_PSEUDO_FUNC) + continue; + insn->src_reg = 0; + } } /* single env->prog->insni[off] instruction was replaced with the range @@ -11389,6 +11531,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + env->insn_aux_data[i].call_imm = insn->imm; + /* subprog is encoded in insn[1].imm */ + continue; + } + if (!bpf_pseudo_call(insn)) continue; /* Upon error here we cannot fall back to interpreter but @@ -11492,6 +11640,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) for (i = 0; i < env->subprog_cnt; i++) { insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { + if (bpf_pseudo_func(insn)) { + subprog = insn[1].imm; + insn[0].imm = (u32)(long)func[subprog]->bpf_func; + insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; + continue; + } if (!bpf_pseudo_call(insn)) continue; subprog = insn->off; @@ -11537,6 +11691,11 @@ static int jit_subprogs(struct bpf_verifier_env *env) * later look the same as if they were interpreted only. */ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + insn[0].imm = env->insn_aux_data[i].call_imm; + insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + continue; + } if (!bpf_pseudo_call(insn)) continue; insn->off = env->insn_aux_data[i].call_imm; @@ -11609,6 +11768,14 @@ static int fixup_call_args(struct bpf_verifier_env *env) return -EINVAL; } for (i = 0; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + /* When JIT fails the progs with callback calls + * have to be rejected, since interpreter doesn't support them yet. + */ + verbose(env, "callbacks are not allowed in non-JITed programs\n"); + return -EINVAL; + } + if (!bpf_pseudo_call(insn)) continue; depth = get_callee_stack_depth(env, insn, i); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 81a7b622c691703e8dca0d6f5cd47ce2916e1f37..fba205086cf85eaa7527beeb1a42e70e9131fe26 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1331,6 +1331,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_per_cpu_ptr_proto; case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; + case BPF_FUNC_for_each_map_elem: + return &bpf_for_each_map_elem_proto; default: return NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 14c44fdf3f702d0e33123d912d7294170193e790..bb9e72b75d63e0893c9fb8e0ad541ebb4e1a636c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -394,6 +394,15 @@ enum bpf_link_type { * is struct/union. */ #define BPF_PSEUDO_BTF_ID 3 +/* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ +#define BPF_PSEUDO_FUNC 4 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -3923,6 +3932,34 @@ union bpf_attr { * set cpus_ptr in task. * Return * 0 on success, or a negative error in case of failure. + * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For each element in **map**, call **callback_fn** function with + * **map**, **callback_ctx** and other map-specific parameters. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * The following are a list of supported map types and their + * respective expected callback signatures: + * + * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, + * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY + * + * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); + * + * For per_cpu maps, the map_value is the value on the cpu where the + * bpf_prog is running. + * + * If **callback_fn** return 0, the helper will continue to the next + * element. If return value is 1, the helper will skip the rest of + * elements and return. Other return values are not used now. + * + * Return + * The number of traversed map elements for success, **-EINVAL** for + * invalid **flags**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4102,6 +4139,7 @@ union bpf_attr { FN(cpumask_op), \ FN(cpus_share_cache), \ FN(sched_set_task_cpus_ptr), \ + FN(for_each_map_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper