bpf_verifier.h 17.6 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0-only */
2 3 4 5 6 7
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 */
#ifndef _LINUX_BPF_VERIFIER_H
#define _LINUX_BPF_VERIFIER_H 1

#include <linux/bpf.h> /* for enum bpf_reg_type */
8
#include <linux/btf.h> /* for struct btf and btf_id() */
9
#include <linux/filter.h> /* for MAX_BPF_STACK */
10
#include <linux/tnum.h>
11

12 13 14 15
/* Maximum variable offset umax_value permitted when resolving memory accesses.
 * In practice this is far bigger than any realistic pointer offset; this limit
 * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
 */
A
Alexei Starovoitov 已提交
16
#define BPF_MAX_VAR_OFF	(1 << 29)
17 18 19
/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
 * that converting umax_value to int cannot overflow.
 */
A
Alexei Starovoitov 已提交
20
#define BPF_MAX_VAR_SIZ	(1 << 29)
21

22 23 24 25 26 27 28 29 30 31 32 33 34
/* Liveness marks, used for registers and spilled-regs (in stack slots).
 * Read marks propagate upwards until they find a write mark; they record that
 * "one of this state's descendants read this reg" (and therefore the reg is
 * relevant for states_equal() checks).
 * Write marks collect downwards and do not propagate; they record that "the
 * straight-line code that reached this state (from its parent) wrote this reg"
 * (and therefore that reads propagated from this state or its descendants
 * should not propagate to its parent).
 * A state with a write mark can receive read marks; it just won't propagate
 * them to its parent, since the write mark is a property, not of the state,
 * but of the link between it and its parent.  See mark_reg_read() and
 * mark_stack_slot_read() in kernel/bpf/verifier.c.
 */
35 36
enum bpf_reg_liveness {
	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
37 38 39 40 41
	REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
	REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
	REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
	REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
42 43
};

44
struct bpf_reg_state {
45
	/* Ordering of fields matters.  See states_equal() */
46
	enum bpf_reg_type type;
47 48
	/* Fixed part of pointer offset, pointer types only */
	s32 off;
49
	union {
50
		/* valid when type == PTR_TO_PACKET */
51
		int range;
52 53 54 55 56

		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
		 *   PTR_TO_MAP_VALUE_OR_NULL
		 */
		struct bpf_map *map_ptr;
57

58 59 60 61 62
		/* for PTR_TO_BTF_ID */
		struct {
			struct btf *btf;
			u32 btf_id;
		};
63

64 65
		u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */

66
		/* Max size from any of the above. */
67 68 69 70
		struct {
			unsigned long raw1;
			unsigned long raw2;
		} raw;
71 72

		u32 subprogno; /* for PTR_TO_FUNC */
73
	};
74 75 76 77
	/* For PTR_TO_PACKET, used to find other pointers with the same variable
	 * offset, so they can share range knowledge.
	 * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
	 * came from, when one is tested for != NULL.
78 79
	 * For PTR_TO_MEM_OR_NULL this is used to identify memory allocation
	 * for the purpose of tracking that it's freed.
80 81
	 * For PTR_TO_SOCKET this is used to share which pointers retain the
	 * same reference to the socket, to determine proper reference freeing.
82
	 */
A
Alexei Starovoitov 已提交
83
	u32 id;
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
	/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
	 * from a pointer-cast helper, bpf_sk_fullsock() and
	 * bpf_tcp_sock().
	 *
	 * Consider the following where "sk" is a reference counted
	 * pointer returned from "sk = bpf_sk_lookup_tcp();":
	 *
	 * 1: sk = bpf_sk_lookup_tcp();
	 * 2: if (!sk) { return 0; }
	 * 3: fullsock = bpf_sk_fullsock(sk);
	 * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
	 * 5: tp = bpf_tcp_sock(fullsock);
	 * 6: if (!tp) { bpf_sk_release(sk); return 0; }
	 * 7: bpf_sk_release(sk);
	 * 8: snd_cwnd = tp->snd_cwnd;  // verifier will complain
	 *
	 * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
	 * "tp" ptr should be invalidated also.  In order to do that,
	 * the reg holding "fullsock" and "sk" need to remember
	 * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
	 * such that the verifier can reset all regs which have
	 * ref_obj_id matching the sk_reg->id.
	 *
	 * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
	 * sk_reg->id will stay as NULL-marking purpose only.
	 * After NULL-marking is done, sk_reg->id can be reset to 0.
	 *
	 * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
	 * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
	 *
	 * After "tp = bpf_tcp_sock(fullsock);" at line 5,
	 * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
	 * which is the same as sk_reg->ref_obj_id.
	 *
	 * From the verifier perspective, if sk, fullsock and tp
	 * are not NULL, they are the same ptr with different
	 * reg->type.  In particular, bpf_sk_release(tp) is also
	 * allowed and has the same effect as bpf_sk_release(sk).
	 */
	u32 ref_obj_id;
124 125 126 127 128 129 130
	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
	 * the actual value.
	 * For pointer types, this represents the variable part of the offset
	 * from the pointed-to object, and is shared with all bpf_reg_states
	 * with the same id as us.
	 */
	struct tnum var_off;
A
Alexei Starovoitov 已提交
131
	/* Used to determine if any memory access using this register will
132 133 134
	 * result in a bad access.
	 * These refer to the same value as var_off, not necessarily the actual
	 * contents of the register.
A
Alexei Starovoitov 已提交
135
	 */
136 137 138 139
	s64 smin_value; /* minimum possible (s64)value */
	s64 smax_value; /* maximum possible (s64)value */
	u64 umin_value; /* minimum possible (u64)value */
	u64 umax_value; /* maximum possible (u64)value */
140 141 142 143
	s32 s32_min_value; /* minimum possible (s32)value */
	s32 s32_max_value; /* maximum possible (s32)value */
	u32 u32_min_value; /* minimum possible (u32)value */
	u32 u32_max_value; /* maximum possible (u32)value */
144 145
	/* parentage chain for liveness checking */
	struct bpf_reg_state *parent;
146 147 148 149 150 151 152
	/* Inside the callee two registers can be both PTR_TO_STACK like
	 * R1=fp-8 and R2=fp-8, but one of them points to this function stack
	 * while another to the caller's stack. To differentiate them 'frameno'
	 * is used which is an index in bpf_verifier_state->frame[] array
	 * pointing to bpf_func_state.
	 */
	u32 frameno;
153 154 155 156 157
	/* Tracks subreg definition. The stored value is the insn_idx of the
	 * writing insn. This is safe because subreg_def is used before any insn
	 * patching which only happens after main verification finished.
	 */
	s32 subreg_def;
158
	enum bpf_reg_liveness live;
159 160
	/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
	bool precise;
161 162 163 164 165
};

enum bpf_stack_slot_type {
	STACK_INVALID,    /* nothing was stored in this stack slot */
	STACK_SPILL,      /* register spilled into stack */
166 167
	STACK_MISC,	  /* BPF program wrote some data into this slot */
	STACK_ZERO,	  /* BPF program wrote constant zero */
168 169 170 171
};

#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */

172 173 174 175 176
struct bpf_stack_state {
	struct bpf_reg_state spilled_ptr;
	u8 slot_type[BPF_REG_SIZE];
};

177 178 179 180 181 182 183 184 185 186 187
struct bpf_reference_state {
	/* Track each reference created with a unique id, even if the same
	 * instruction creates the reference multiple times (eg, via CALL).
	 */
	int id;
	/* Instruction where the allocation of this reference occurred. This
	 * is used purely to inform the user of a reference leak.
	 */
	int insn_idx;
};

188 189 190
/* state of the program:
 * type of all registers and stack info
 */
191
struct bpf_func_state {
192
	struct bpf_reg_state regs[MAX_BPF_REG];
193 194 195 196 197 198 199
	/* index of call instruction that called into this func */
	int callsite;
	/* stack frame number of this function state from pov of
	 * enclosing bpf_verifier_state.
	 * 0 = main function, 1 = first callee.
	 */
	u32 frameno;
200
	/* subprog number == index within subprog_info
201 202 203 204
	 * zero == main subprog
	 */
	u32 subprogno;

205 206 207
	/* The following fields should be last. See copy_func_state() */
	int acquired_refs;
	struct bpf_reference_state *refs;
208
	int allocated_stack;
209
	bool in_callback_fn;
210
	struct bpf_stack_state *stack;
211 212
};

213 214 215 216 217
struct bpf_idx_pair {
	u32 prev_idx;
	u32 idx;
};

218 219 220 221
#define MAX_CALL_FRAMES 8
struct bpf_verifier_state {
	/* call stack tracking */
	struct bpf_func_state *frame[MAX_CALL_FRAMES];
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
	struct bpf_verifier_state *parent;
	/*
	 * 'branches' field is the number of branches left to explore:
	 * 0 - all possible paths from this state reached bpf_exit or
	 * were safely pruned
	 * 1 - at least one path is being explored.
	 * This state hasn't reached bpf_exit
	 * 2 - at least two paths are being explored.
	 * This state is an immediate parent of two children.
	 * One is fallthrough branch with branches==1 and another
	 * state is pushed into stack (to be explored later) also with
	 * branches==1. The parent of this state has branches==1.
	 * The verifier state tree connected via 'parent' pointer looks like:
	 * 1
	 * 1
	 * 2 -> 1 (first 'if' pushed into stack)
	 * 1
	 * 2 -> 1 (second 'if' pushed into stack)
	 * 1
	 * 1
	 * 1 bpf_exit.
	 *
	 * Once do_check() reaches bpf_exit, it calls update_branch_counts()
	 * and the verifier state tree will look:
	 * 1
	 * 1
	 * 2 -> 1 (first 'if' pushed into stack)
	 * 1
	 * 1 -> 1 (second 'if' pushed into stack)
	 * 0
	 * 0
	 * 0 bpf_exit.
	 * After pop_stack() the do_check() will resume at second 'if'.
	 *
	 * If is_state_visited() sees a state with branches > 0 it means
	 * there is a loop. If such state is exactly equal to the current state
	 * it's an infinite loop. Note states_equal() checks for states
	 * equvalency, so two states being 'states_equal' does not mean
	 * infinite loop. The exact comparison is provided by
	 * states_maybe_looping() function. It's a stronger pre-check and
	 * much faster than states_equal().
	 *
	 * This algorithm may not find all possible infinite loops or
	 * loop iteration count may be too high.
	 * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in.
	 */
	u32 branches;
269
	u32 insn_idx;
270
	u32 curframe;
271
	u32 active_spin_lock;
272
	bool speculative;
273 274 275 276 277 278 279 280 281 282 283

	/* first and last insn idx of this verifier state */
	u32 first_insn_idx;
	u32 last_insn_idx;
	/* jmp history recorded from first to last.
	 * backtracking is using it to go from last to first.
	 * For most states jmp_history_cnt is [0-3].
	 * For loops can go up to ~40.
	 */
	struct bpf_idx_pair *jmp_history;
	u32 jmp_history_cnt;
284 285
};

286 287 288 289 290 291 292 293 294 295 296
#define bpf_get_spilled_reg(slot, frame)				\
	(((slot < frame->allocated_stack / BPF_REG_SIZE) &&		\
	  (frame->stack[slot].slot_type[0] == STACK_SPILL))		\
	 ? &frame->stack[slot].spilled_ptr : NULL)

/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
#define bpf_for_each_spilled_reg(iter, frame, reg)			\
	for (iter = 0, reg = bpf_get_spilled_reg(iter, frame);		\
	     iter < frame->allocated_stack / BPF_REG_SIZE;		\
	     iter++, reg = bpf_get_spilled_reg(iter, frame))

297 298 299 300
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
	struct bpf_verifier_state state;
	struct bpf_verifier_state_list *next;
301
	int miss_cnt, hit_cnt;
302 303
};

304
/* Possible states for alu_state member. */
305 306
#define BPF_ALU_SANITIZE_SRC		(1U << 0)
#define BPF_ALU_SANITIZE_DST		(1U << 1)
307
#define BPF_ALU_NEG_VALUE		(1U << 2)
308
#define BPF_ALU_NON_POINTER		(1U << 3)
309
#define BPF_ALU_IMMEDIATE		(1U << 4)
310 311 312
#define BPF_ALU_SANITIZE		(BPF_ALU_SANITIZE_SRC | \
					 BPF_ALU_SANITIZE_DST)

313
struct bpf_insn_aux_data {
314 315
	union {
		enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
316
		unsigned long map_ptr_state;	/* pointer/poison value for maps */
317
		s32 call_imm;			/* saved imm field of call insn */
318
		u32 alu_limit;			/* limit for add/sub register with pointer */
319 320 321 322
		struct {
			u32 map_index;		/* index into used_maps[] */
			u32 map_off;		/* offset from value base address */
		};
H
Hao Luo 已提交
323 324 325
		struct {
			enum bpf_reg_type reg_type;	/* type of pseudo_btf_id */
			union {
326 327 328 329
				struct {
					struct btf *btf;
					u32 btf_id;	/* btf_id for struct typed var */
				};
H
Hao Luo 已提交
330 331 332
				u32 mem_size;	/* mem_size for non-struct typed var */
			};
		} btf_var;
333
	};
334
	u64 map_key_state; /* constant (32 bit) key tracking for maps */
335
	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
336
	int sanitize_stack_off; /* stack slot to be cleared */
337
	u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
338
	bool zext_dst; /* this insn zero extends dst reg */
339
	u8 alu_state; /* used in combination with alu_limit */
340 341

	/* below fields are initialized once */
342
	unsigned int orig_idx; /* original instruction index */
343
	bool prune_point;
344 345 346
};

#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
347
#define MAX_USED_BTFS 64 /* max number of BTFs accessed by one BPF program */
348

349 350
#define BPF_VERIFIER_TMP_LOG_SIZE	1024

M
Martin KaFai Lau 已提交
351
struct bpf_verifier_log {
352
	u32 level;
353
	char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
354 355 356 357 358
	char __user *ubuf;
	u32 len_used;
	u32 len_total;
};

M
Martin KaFai Lau 已提交
359
static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
360 361 362 363
{
	return log->len_used >= log->len_total - 1;
}

364 365 366 367 368
#define BPF_LOG_LEVEL1	1
#define BPF_LOG_LEVEL2	2
#define BPF_LOG_STATS	4
#define BPF_LOG_LEVEL	(BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
#define BPF_LOG_MASK	(BPF_LOG_LEVEL | BPF_LOG_STATS)
A
Alexei Starovoitov 已提交
369
#define BPF_LOG_KERNEL	(BPF_LOG_MASK + 1) /* kernel internal flag */
370

371 372
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
373 374 375
	return log &&
		((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
		 log->level == BPF_LOG_KERNEL);
376 377
}

378 379
#define BPF_MAX_SUBPROGS 256

380
struct bpf_subprog_info {
381
	/* 'start' has to be the first field otherwise find_subprog() won't work */
382
	u32 start; /* insn idx of function entry point */
M
Martin KaFai Lau 已提交
383
	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
384
	u16 stack_depth; /* max. stack depth used by this function */
385
	bool has_tail_call;
386
	bool tail_call_reachable;
387
	bool has_ld_abs;
388 389
};

390 391 392 393
/* single container for all structs
 * one verifier_env per bpf_check() call
 */
struct bpf_verifier_env {
394 395
	u32 insn_idx;
	u32 prev_insn_idx;
396
	struct bpf_prog *prog;		/* eBPF program being verified */
397
	const struct bpf_verifier_ops *ops;
398 399
	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
	int stack_size;			/* number of states to be processed */
400
	bool strict_alignment;		/* perform strict pointer alignment checks */
401
	bool test_state_freq;		/* test verifier with different pruning frequency */
402
	struct bpf_verifier_state *cur_state; /* current verifier state */
403
	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
404
	struct bpf_verifier_state_list *free_list;
405
	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
406
	struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */
407
	u32 used_map_cnt;		/* number of used maps */
408
	u32 used_btf_cnt;		/* number of used BTF objects */
409 410
	u32 id_gen;			/* used to generate unique reg IDs */
	bool allow_ptr_leaks;
411
	bool allow_uninit_stack;
412
	bool allow_ptr_to_map_access;
A
Alexei Starovoitov 已提交
413 414 415
	bool bpf_capable;
	bool bypass_spec_v1;
	bool bypass_spec_v4;
416 417
	bool seen_direct_write;
	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
418
	const struct bpf_line_info *prev_linfo;
M
Martin KaFai Lau 已提交
419
	struct bpf_verifier_log log;
420
	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
421 422 423 424 425
	struct {
		int *insn_state;
		int *insn_stack;
		int cur_stack;
	} cfg;
426
	u32 pass_cnt; /* number of times do_check() was called */
427
	u32 subprog_cnt;
428
	/* number of instructions analyzed by the verifier */
429 430 431
	u32 prev_insn_processed, insn_processed;
	/* number of jmps, calls, exits analyzed so far */
	u32 prev_jmps_processed, jmps_processed;
432 433 434 435 436 437 438 439 440 441 442 443 444
	/* total verification time */
	u64 verification_time;
	/* maximum number of verifier states kept in 'branching' instructions */
	u32 max_states_per_insn;
	/* total number of allocated verifier states */
	u32 total_states;
	/* some states are freed during program analysis.
	 * this is peak number of states. this number dominates kernel
	 * memory consumption during verification
	 */
	u32 peak_states;
	/* longest register parentage chain walked for liveness marking */
	u32 longest_mark_read_walk;
445 446
};

447 448
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
				      const char *fmt, va_list args);
449 450
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
					   const char *fmt, ...);
451 452
__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
			    const char *fmt, ...);
453

454
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
455
{
456 457
	struct bpf_verifier_state *cur = env->cur_state;

458 459 460 461 462 463
	return cur->frame[cur->curframe];
}

static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
{
	return cur_func(env)->regs;
464 465
}

466
int bpf_prog_offload_verifier_prep(struct bpf_prog *prog);
467 468
int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
				 int insn_idx, int prev_insn_idx);
469
int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
470 471 472 473 474
void
bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
			      struct bpf_insn *insn);
void
bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
475

476 477
int check_ctx_reg(struct bpf_verifier_env *env,
		  const struct bpf_reg_state *reg, int regno);
478 479
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
		   u32 regno, u32 mem_size);
480

481 482
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
483
					     struct btf *btf, u32 btf_id)
484
{
485 486 487 488
	if (tgt_prog)
		return ((u64)tgt_prog->aux->id << 32) | btf_id;
	else
		return ((u64)btf_obj_id(btf) << 32) | 0x80000000 | btf_id;
489 490
}

491 492 493 494 495 496 497 498 499
/* unpack the IDs from the key as constructed above */
static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id)
{
	if (obj_id)
		*obj_id = key >> 32;
	if (btf_id)
		*btf_id = key & 0x7FFFFFFF;
}

500 501 502 503 504 505
int bpf_check_attach_target(struct bpf_verifier_log *log,
			    const struct bpf_prog *prog,
			    const struct bpf_prog *tgt_prog,
			    u32 btf_id,
			    struct bpf_attach_target_info *tgt_info);

506
#endif /* _LINUX_BPF_VERIFIER_H */