diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 73a3516f1a486a4f3abb91d99ea938a986f06c10..d3b75aa0c54d1ea346c1e04f50f2cfb982b310de 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -504,6 +504,15 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_dynptr_data; } +static bool is_callback_calling_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_for_each_map_elem || + func_id == BPF_FUNC_timer_set_callback || + func_id == BPF_FUNC_find_vma || + func_id == BPF_FUNC_loop || + func_id == BPF_FUNC_user_ringbuf_drain; +} + static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, const struct bpf_map *map) { @@ -1677,7 +1686,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env, reg->type = SCALAR_VALUE; reg->var_off = tnum_unknown; reg->frameno = 0; - reg->precise = env->subprog_cnt > 1 || !env->bpf_capable; + reg->precise = !env->bpf_capable; __mark_reg_unbounded(reg); } @@ -2646,6 +2655,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, if (opcode == BPF_CALL) { if (insn->src_reg == BPF_PSEUDO_CALL) return -ENOTSUPP; + /* BPF helpers that invoke callback subprogs are + * equivalent to BPF_PSEUDO_CALL above + */ + if (insn->src_reg == 0 && is_callback_calling_function(insn->imm)) + return -ENOTSUPP; /* regular helper call sets R0 */ *reg_mask &= ~1; if (*reg_mask & 0x3f) { @@ -2735,8 +2749,11 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env, /* big hammer: mark all scalars precise in this path. * pop_stack may still get !precise scalars. + * We also skip current state and go straight to first parent state, + * because precision markings in current non-checkpointed state are + * not needed. See why in the comment in __mark_chain_precision below. */ - for (; st; st = st->parent) + for (st = st->parent; st; st = st->parent) { for (i = 0; i <= st->curframe; i++) { func = st->frame[i]; for (j = 0; j < BPF_REG_FP; j++) { @@ -2754,9 +2771,122 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env, reg->precise = true; } } + } +} + +static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +{ + struct bpf_func_state *func; + struct bpf_reg_state *reg; + int i, j; + + for (i = 0; i <= st->curframe; i++) { + func = st->frame[i]; + for (j = 0; j < BPF_REG_FP; j++) { + reg = &func->regs[j]; + if (reg->type != SCALAR_VALUE) + continue; + reg->precise = false; + } + for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { + if (!is_spilled_reg(&func->stack[j])) + continue; + reg = &func->stack[j].spilled_ptr; + if (reg->type != SCALAR_VALUE) + continue; + reg->precise = false; + } + } } -static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, +/* + * __mark_chain_precision() backtracks BPF program instruction sequence and + * chain of verifier states making sure that register *regno* (if regno >= 0) + * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked + * SCALARS, as well as any other registers and slots that contribute to + * a tracked state of given registers/stack slots, depending on specific BPF + * assembly instructions (see backtrack_insns() for exact instruction handling + * logic). This backtracking relies on recorded jmp_history and is able to + * traverse entire chain of parent states. This process ends only when all the + * necessary registers/slots and their transitive dependencies are marked as + * precise. + * + * One important and subtle aspect is that precise marks *do not matter* in + * the currently verified state (current state). It is important to understand + * why this is the case. + * + * First, note that current state is the state that is not yet "checkpointed", + * i.e., it is not yet put into env->explored_states, and it has no children + * states as well. It's ephemeral, and can end up either a) being discarded if + * compatible explored state is found at some point or BPF_EXIT instruction is + * reached or b) checkpointed and put into env->explored_states, branching out + * into one or more children states. + * + * In the former case, precise markings in current state are completely + * ignored by state comparison code (see regsafe() for details). Only + * checkpointed ("old") state precise markings are important, and if old + * state's register/slot is precise, regsafe() assumes current state's + * register/slot as precise and checks value ranges exactly and precisely. If + * states turn out to be compatible, current state's necessary precise + * markings and any required parent states' precise markings are enforced + * after the fact with propagate_precision() logic, after the fact. But it's + * important to realize that in this case, even after marking current state + * registers/slots as precise, we immediately discard current state. So what + * actually matters is any of the precise markings propagated into current + * state's parent states, which are always checkpointed (due to b) case above). + * As such, for scenario a) it doesn't matter if current state has precise + * markings set or not. + * + * Now, for the scenario b), checkpointing and forking into child(ren) + * state(s). Note that before current state gets to checkpointing step, any + * processed instruction always assumes precise SCALAR register/slot + * knowledge: if precise value or range is useful to prune jump branch, BPF + * verifier takes this opportunity enthusiastically. Similarly, when + * register's value is used to calculate offset or memory address, exact + * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to + * what we mentioned above about state comparison ignoring precise markings + * during state comparison, BPF verifier ignores and also assumes precise + * markings *at will* during instruction verification process. But as verifier + * assumes precision, it also propagates any precision dependencies across + * parent states, which are not yet finalized, so can be further restricted + * based on new knowledge gained from restrictions enforced by their children + * states. This is so that once those parent states are finalized, i.e., when + * they have no more active children state, state comparison logic in + * is_state_visited() would enforce strict and precise SCALAR ranges, if + * required for correctness. + * + * To build a bit more intuition, note also that once a state is checkpointed, + * the path we took to get to that state is not important. This is crucial + * property for state pruning. When state is checkpointed and finalized at + * some instruction index, it can be correctly and safely used to "short + * circuit" any *compatible* state that reaches exactly the same instruction + * index. I.e., if we jumped to that instruction from a completely different + * code path than original finalized state was derived from, it doesn't + * matter, current state can be discarded because from that instruction + * forward having a compatible state will ensure we will safely reach the + * exit. States describe preconditions for further exploration, but completely + * forget the history of how we got here. + * + * This also means that even if we needed precise SCALAR range to get to + * finalized state, but from that point forward *that same* SCALAR register is + * never used in a precise context (i.e., it's precise value is not needed for + * correctness), it's correct and safe to mark such register as "imprecise" + * (i.e., precise marking set to false). This is what we rely on when we do + * not set precise marking in current state. If no child state requires + * precision for any given SCALAR register, it's safe to dictate that it can + * be imprecise. If any child state does require this register to be precise, + * we'll mark it precise later retroactively during precise markings + * propagation from child state to parent states. + * + * Skipping precise marking setting in current state is a mild version of + * relying on the above observation. But we can utilize this property even + * more aggressively by proactively forgetting any precise marking in the + * current state (which we inherited from the parent state), right before we + * checkpoint it and branch off into new child state. This is done by + * mark_all_scalars_imprecise() to hopefully get more permissive and generic + * finalized states which help in short circuiting more future states. + */ +static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno, int spi) { struct bpf_verifier_state *st = env->cur_state; @@ -2773,18 +2903,18 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, if (!env->bpf_capable) return 0; - func = st->frame[st->curframe]; + /* Do sanity checks against current state of register and/or stack + * slot, but don't set precise flag in current state, as precision + * tracking in the current state is unnecessary. + */ + func = st->frame[frame]; if (regno >= 0) { reg = &func->regs[regno]; if (reg->type != SCALAR_VALUE) { WARN_ONCE(1, "backtracing misuse"); return -EFAULT; } - if (!reg->precise) - new_marks = true; - else - reg_mask = 0; - reg->precise = true; + new_marks = true; } while (spi >= 0) { @@ -2797,11 +2927,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, stack_mask = 0; break; } - if (!reg->precise) - new_marks = true; - else - stack_mask = 0; - reg->precise = true; + new_marks = true; break; } @@ -2809,12 +2935,42 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, return 0; if (!reg_mask && !stack_mask) return 0; + for (;;) { DECLARE_BITMAP(mask, 64); u32 history = st->jmp_history_cnt; if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); + + if (last_idx < 0) { + /* we are at the entry into subprog, which + * is expected for global funcs, but only if + * requested precise registers are R1-R5 + * (which are global func's input arguments) + */ + if (st->curframe == 0 && + st->frame[0]->subprogno > 0 && + st->frame[0]->callsite == BPF_MAIN_FUNC && + stack_mask == 0 && (reg_mask & ~0x3e) == 0) { + bitmap_from_u64(mask, reg_mask); + for_each_set_bit(i, mask, 32) { + reg = &st->frame[0]->regs[i]; + if (reg->type != SCALAR_VALUE) { + reg_mask &= ~(1u << i); + continue; + } + reg->precise = true; + } + return 0; + } + + verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n", + st->frame[0]->subprogno, reg_mask, stack_mask); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + for (i = last_idx;;) { if (skip_first) { err = 0; @@ -2854,7 +3010,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, break; new_marks = false; - func = st->frame[st->curframe]; + func = st->frame[frame]; bitmap_from_u64(mask, reg_mask); for_each_set_bit(i, mask, 32) { reg = &func->regs[i]; @@ -2920,12 +3076,17 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, int mark_chain_precision(struct bpf_verifier_env *env, int regno) { - return __mark_chain_precision(env, regno, -1); + return __mark_chain_precision(env, env->cur_state->curframe, regno, -1); +} + +static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno) +{ + return __mark_chain_precision(env, frame, regno, -1); } -static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) +static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi) { - return __mark_chain_precision(env, -1, spi); + return __mark_chain_precision(env, frame, -1, spi); } static bool is_spillable_regtype(enum bpf_reg_type type) @@ -6597,6 +6758,10 @@ typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env, struct bpf_func_state *callee, int insn_idx); +static int set_callee_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, int insn_idx); + static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx, int subprog, set_callee_state_fn set_callee_state_cb) @@ -6647,6 +6812,16 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn } } + /* set_callee_state is used for direct subprog calls, but we are + * interested in validating only BPF helpers that can call subprogs as + * callbacks + */ + if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) { + verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; + } + if (insn->code == (BPF_JMP | BPF_CALL) && insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback) { @@ -9153,6 +9328,11 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, return err; return adjust_ptr_min_max_vals(env, insn, dst_reg, src_reg); + } else if (dst_reg->precise) { + /* if dst_reg is precise, src_reg should be precise as well */ + err = mark_chain_precision(env, insn->src_reg); + if (err) + return err; } } else { /* Pretend the src is a reg with a known value, since we only @@ -11466,7 +11646,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, if (env->explore_alu_limits) return false; if (rcur->type == SCALAR_VALUE) { - if (!rold->precise && !rcur->precise) + if (!rold->precise) return true; /* new val must satisfy old val knowledge */ return range_within(rold, rcur) && @@ -11789,34 +11969,36 @@ static int propagate_precision(struct bpf_verifier_env *env, { struct bpf_reg_state *state_reg; struct bpf_func_state *state; - int i, err = 0; + int i, err = 0, fr; - state = old->frame[old->curframe]; - state_reg = state->regs; - for (i = 0; i < BPF_REG_FP; i++, state_reg++) { - if (state_reg->type != SCALAR_VALUE || - !state_reg->precise) - continue; - if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "propagating r%d\n", i); - err = mark_chain_precision(env, i); - if (err < 0) - return err; - } + for (fr = old->curframe; fr >= 0; fr--) { + state = old->frame[fr]; + state_reg = state->regs; + for (i = 0; i < BPF_REG_FP; i++, state_reg++) { + if (state_reg->type != SCALAR_VALUE || + !state_reg->precise) + continue; + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "frame %d: propagating r%d\n", i, fr); + err = mark_chain_precision_frame(env, fr, i); + if (err < 0) + return err; + } - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (!is_spilled_reg(&state->stack[i])) - continue; - state_reg = &state->stack[i].spilled_ptr; - if (state_reg->type != SCALAR_VALUE || - !state_reg->precise) - continue; - if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "propagating fp%d\n", - (-i - 1) * BPF_REG_SIZE); - err = mark_chain_precision_stack(env, i); - if (err < 0) - return err; + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (!is_spilled_reg(&state->stack[i])) + continue; + state_reg = &state->stack[i].spilled_ptr; + if (state_reg->type != SCALAR_VALUE || + !state_reg->precise) + continue; + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "frame %d: propagating fp%d\n", + (-i - 1) * BPF_REG_SIZE, fr); + err = mark_chain_precision_stack_frame(env, fr, i); + if (err < 0) + return err; + } } return 0; } @@ -12011,6 +12193,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) env->prev_jmps_processed = env->jmps_processed; env->prev_insn_processed = env->insn_processed; + /* forget precise markings we inherited, see __mark_chain_precision */ + if (env->bpf_capable) + mark_all_scalars_imprecise(env, cur); + /* add new state to the head of linked list */ new = &new_sl->state; err = copy_verifier_state(new, cur); @@ -14559,6 +14745,8 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) BPF_MAIN_FUNC /* callsite */, 0 /* frameno */, subprog); + state->first_insn_idx = env->subprog_info[subprog].start; + state->last_insn_idx = -1; regs = state->frame[state->curframe]->regs; if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 970f09156eb4650f66cf3e7b6a8c5d0923df1ccc..4666f88f2bb4f5f7590efc214925dd104be6bbaa 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -2,7 +2,7 @@ #include #define MAX_INSNS 512 -#define MAX_MATCHES 16 +#define MAX_MATCHES 24 struct bpf_reg_match { unsigned int line; @@ -267,6 +267,7 @@ static struct bpf_align_test tests[] = { */ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), @@ -280,6 +281,7 @@ static struct bpf_align_test tests[] = { BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4), BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), @@ -311,44 +313,52 @@ static struct bpf_align_test tests[] = { {15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, {15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, /* Variable offset is added to R5 packet pointer, - * resulting in auxiliary alignment of 4. + * resulting in auxiliary alignment of 4. To avoid BPF + * verifier's precision backtracking logging + * interfering we also have a no-op R4 = R5 + * instruction to validate R5 state. We also check + * that R4 is what it should be in such case. */ - {17, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {18, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte * aligned, so the total offset is 4-byte aligned and * meets the load's requirements. */ - {23, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {23, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {25, "R5_w=pkt(off=14,r=8"}, + {26, "R5_w=pkt(off=14,r=8"}, /* Variable offset is added to R5, resulting in a - * variable offset of (4n). + * variable offset of (4n). See comment for insn #18 + * for R4 = R5 trick. */ - {26, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {27, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {28, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {33, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, - {33, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, }, }, { @@ -681,6 +691,6 @@ void test_align(void) if (!test__start_subtest(test->descr)) continue; - CHECK_FAIL(do_test_single(test)); + ASSERT_OK(do_test_single(test), test->descr); } }