verifier.c 343.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
A
Alexei Starovoitov 已提交
2
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
A
Alexei Starovoitov 已提交
3
 * Copyright (c) 2016 Facebook
4
 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
A
Alexei Starovoitov 已提交
5
 */
Y
Yonghong Song 已提交
6
#include <uapi/linux/btf.h>
A
Alexei Starovoitov 已提交
7 8 9 10
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
Y
Yonghong Song 已提交
11
#include <linux/btf.h>
12
#include <linux/bpf_verifier.h>
A
Alexei Starovoitov 已提交
13 14 15 16
#include <linux/filter.h>
#include <net/netlink.h>
#include <linux/file.h>
#include <linux/vmalloc.h>
17
#include <linux/stringify.h>
18 19
#include <linux/bsearch.h>
#include <linux/sort.h>
Y
Yonghong Song 已提交
20
#include <linux/perf_event.h>
21
#include <linux/ctype.h>
22
#include <linux/error-injection.h>
23
#include <linux/bpf_lsm.h>
24
#include <linux/btf_ids.h>
A
Alexei Starovoitov 已提交
25

26 27
#include "disasm.h"

28
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
A
Alexei Starovoitov 已提交
29
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
30 31
	[_id] = & _name ## _verifier_ops,
#define BPF_MAP_TYPE(_id, _ops)
32
#define BPF_LINK_TYPE(_id, _name)
33 34 35
#include <linux/bpf_types.h>
#undef BPF_PROG_TYPE
#undef BPF_MAP_TYPE
36
#undef BPF_LINK_TYPE
37 38
};

A
Alexei Starovoitov 已提交
39 40 41 42 43 44 45 46 47 48 49 50
/* bpf_check() is a static code analyzer that walks eBPF program
 * instruction by instruction and updates register/stack state.
 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
 *
 * The first pass is depth-first-search to check that the program is a DAG.
 * It rejects the following programs:
 * - larger than BPF_MAXINSNS insns
 * - if loop is present (detected via back-edge)
 * - unreachable insns exist (shouldn't be a forest. program = one function)
 * - out of bounds or malformed jumps
 * The second pass is all possible path descent from the 1st insn.
 * Since it's analyzing all pathes through the program, the length of the
51
 * analysis is limited to 64k insn, which may be hit even if total number of
A
Alexei Starovoitov 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
 * insn is less then 4K, but there are too many branches that change stack/regs.
 * Number of 'branches to be analyzed' is limited to 1k
 *
 * On entry to each instruction, each register has a type, and the instruction
 * changes the types of the registers depending on instruction semantics.
 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
 * copied to R1.
 *
 * All registers are 64-bit.
 * R0 - return register
 * R1-R5 argument passing registers
 * R6-R9 callee saved registers
 * R10 - frame pointer read-only
 *
 * At the start of BPF program the register R1 contains a pointer to bpf_context
 * and has type PTR_TO_CTX.
 *
 * Verifier tracks arithmetic operations on pointers in case:
 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
 * 1st insn copies R10 (which has FRAME_PTR) type into R1
 * and 2nd arithmetic instruction is pattern matched to recognize
 * that it wants to construct a pointer to some element within stack.
 * So after 2nd insn, the register R1 has type PTR_TO_STACK
 * (and -20 constant is saved for further stack bounds checking).
 * Meaning that this reg is a pointer to stack plus known immediate constant.
 *
79
 * Most of the time the registers have SCALAR_VALUE type, which
A
Alexei Starovoitov 已提交
80
 * means the register has some value, but it's not a valid pointer.
81
 * (like pointer plus pointer becomes SCALAR_VALUE type)
A
Alexei Starovoitov 已提交
82 83
 *
 * When verifier sees load or store instructions the type of base register
84 85
 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
 * four pointer types recognized by check_mem_access() function.
A
Alexei Starovoitov 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 *
 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
 * and the range of [ptr, ptr + map's value_size) is accessible.
 *
 * registers used to pass values to function calls are checked against
 * function argument constraints.
 *
 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
 * It means that the register type passed to this function must be
 * PTR_TO_STACK and it will be used inside the function as
 * 'pointer to map element key'
 *
 * For example the argument constraints for bpf_map_lookup_elem():
 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 *   .arg1_type = ARG_CONST_MAP_PTR,
 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 *
 * ret_type says that this function returns 'pointer to map elem value or null'
 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 * 2nd argument should be a pointer to stack, which will be used inside
 * the helper function as a pointer to map element key.
 *
 * On the kernel side the helper function looks like:
 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 * {
 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 *    void *key = (void *) (unsigned long) r2;
 *    void *value;
 *
 *    here kernel can access 'key' and 'map' pointers safely, knowing that
 *    [key, key + map->key_size) bytes are valid and were initialized on
 *    the stack of eBPF program.
 * }
 *
 * Corresponding eBPF program may look like:
 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 * here verifier looks at prototype of map_lookup_elem() and sees:
 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 *
 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 * and were initialized prior to this call.
 * If it's ok, then verifier allows this BPF_CALL insn and looks at
 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 * returns ether pointer to map value or NULL.
 *
 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 * insn, the register holding that pointer in the true branch changes state to
 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 * branch. See check_cond_jmp_op().
 *
 * After the call R0 is set to return type of the function and registers R1-R5
 * are set to NOT_INIT to indicate that they are no longer readable.
144 145 146 147 148 149 150 151 152 153 154 155
 *
 * The following reference types represent a potential reference to a kernel
 * resource which, after first being allocated, must be checked and freed by
 * the BPF program:
 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 *
 * When the verifier sees a helper call return a reference type, it allocates a
 * pointer id for the reference and stores it in the current function state.
 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 * passes through a NULL-check conditional. For the branch wherein the state is
 * changed to CONST_IMM, the verifier releases the reference.
156 157 158 159 160 161
 *
 * For each helper function that allocates a reference, such as
 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 * bpf_sk_release(). When a reference type passes into the release function,
 * the verifier also releases the reference. If any unchecked or unreleased
 * reference remains at the end of the program, the verifier rejects it.
A
Alexei Starovoitov 已提交
162 163
 */

164
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
165
struct bpf_verifier_stack_elem {
166 167 168 169
	/* verifer state is 'st'
	 * before processing instruction 'insn_idx'
	 * and after processing instruction 'prev_insn_idx'
	 */
170
	struct bpf_verifier_state st;
171 172
	int insn_idx;
	int prev_insn_idx;
173
	struct bpf_verifier_stack_elem *next;
174 175
	/* length of verifier log at the time this state was pushed on stack */
	u32 log_pos;
176 177
};

178
#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
179
#define BPF_COMPLEXITY_LIMIT_STATES	64
180

181 182 183
#define BPF_MAP_KEY_POISON	(1ULL << 63)
#define BPF_MAP_KEY_SEEN	(1ULL << 62)

184 185 186 187 188 189 190
#define BPF_MAP_PTR_UNPRIV	1UL
#define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
					  POISON_POINTER_DELTA))
#define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))

static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
191
	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
192 193 194 195
}

static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
{
196
	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
197 198 199 200 201 202 203
}

static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
			      const struct bpf_map *map, bool unpriv)
{
	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
	unpriv |= bpf_map_ptr_unpriv(aux);
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
	aux->map_ptr_state = (unsigned long)map |
			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
}

static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
{
	return aux->map_key_state & BPF_MAP_KEY_POISON;
}

static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
{
	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
}

static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
{
	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
}

static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
{
	bool poisoned = bpf_map_key_poisoned(aux);

	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
229
}
230

231 232
struct bpf_call_arg_meta {
	struct bpf_map *map_ptr;
233
	bool raw_mode;
234
	bool pkt_access;
235 236
	int regno;
	int access_size;
237
	int mem_size;
238
	u64 msize_max_value;
239
	int ref_obj_id;
240
	int func_id;
H
Hao Luo 已提交
241 242
	u32 btf_id;
	u32 ret_btf_id;
243 244
};

A
Alexei Starovoitov 已提交
245 246
struct btf *btf_vmlinux;

247 248
static DEFINE_MUTEX(bpf_verifier_lock);

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
static const struct bpf_line_info *
find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
{
	const struct bpf_line_info *linfo;
	const struct bpf_prog *prog;
	u32 i, nr_linfo;

	prog = env->prog;
	nr_linfo = prog->aux->nr_linfo;

	if (!nr_linfo || insn_off >= prog->len)
		return NULL;

	linfo = prog->aux->linfo;
	for (i = 1; i < nr_linfo; i++)
		if (insn_off < linfo[i].insn_off)
			break;

	return &linfo[i - 1];
}

270 271
void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
		       va_list args)
272
{
273
	unsigned int n;
274

275 276 277 278 279 280 281 282
	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);

	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
		  "verifier log line truncated - local buffer too short\n");

	n = min(log->len_total - log->len_used - 1, n);
	log->kbuf[n] = '\0';

A
Alexei Starovoitov 已提交
283 284 285 286
	if (log->level == BPF_LOG_KERNEL) {
		pr_err("BPF:%s\n", log->kbuf);
		return;
	}
287 288 289 290
	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
		log->len_used += n;
	else
		log->ubuf = NULL;
291
}
292

293 294 295 296 297 298 299 300 301 302 303 304
static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
{
	char zero = 0;

	if (!bpf_verifier_log_needed(log))
		return;

	log->len_used = new_pos;
	if (put_user(zero, log->ubuf + new_pos))
		log->ubuf = NULL;
}

305 306 307
/* log_level controls verbosity level of eBPF verifier.
 * bpf_verifier_log_write() is used to dump the verification trace to the log,
 * so the user can figure out what's wrong with the program
308
 */
309 310 311 312 313
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
					   const char *fmt, ...)
{
	va_list args;

314 315 316
	if (!bpf_verifier_log_needed(&env->log))
		return;

317
	va_start(args, fmt);
318
	bpf_verifier_vlog(&env->log, fmt, args);
319 320 321 322 323 324
	va_end(args);
}
EXPORT_SYMBOL_GPL(bpf_verifier_log_write);

__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
{
325
	struct bpf_verifier_env *env = private_data;
326 327
	va_list args;

328 329 330
	if (!bpf_verifier_log_needed(&env->log))
		return;

331
	va_start(args, fmt);
332
	bpf_verifier_vlog(&env->log, fmt, args);
333 334
	va_end(args);
}
335

336 337 338 339 340 341 342 343 344 345 346 347 348
__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
			    const char *fmt, ...)
{
	va_list args;

	if (!bpf_verifier_log_needed(log))
		return;

	va_start(args, fmt);
	bpf_verifier_vlog(log, fmt, args);
	va_end(args);
}

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
static const char *ltrim(const char *s)
{
	while (isspace(*s))
		s++;

	return s;
}

__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
					 u32 insn_off,
					 const char *prefix_fmt, ...)
{
	const struct bpf_line_info *linfo;

	if (!bpf_verifier_log_needed(&env->log))
		return;

	linfo = find_linfo(env, insn_off);
	if (!linfo || linfo == env->prev_linfo)
		return;

	if (prefix_fmt) {
		va_list args;

		va_start(args, prefix_fmt);
		bpf_verifier_vlog(&env->log, prefix_fmt, args);
		va_end(args);
	}

	verbose(env, "%s\n",
		ltrim(btf_name_by_offset(env->prog->aux->btf,
					 linfo->line_off)));

	env->prev_linfo = linfo;
}

385 386 387 388 389 390
static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
	return type == PTR_TO_PACKET ||
	       type == PTR_TO_PACKET_META;
}

391 392 393
static bool type_is_sk_pointer(enum bpf_reg_type type)
{
	return type == PTR_TO_SOCKET ||
394
		type == PTR_TO_SOCK_COMMON ||
395 396
		type == PTR_TO_TCP_SOCK ||
		type == PTR_TO_XDP_SOCK;
397 398
}

399 400 401 402 403
static bool reg_type_not_null(enum bpf_reg_type type)
{
	return type == PTR_TO_SOCKET ||
		type == PTR_TO_TCP_SOCK ||
		type == PTR_TO_MAP_VALUE ||
404
		type == PTR_TO_SOCK_COMMON;
405 406
}

407 408
static bool reg_type_may_be_null(enum bpf_reg_type type)
{
409
	return type == PTR_TO_MAP_VALUE_OR_NULL ||
410
	       type == PTR_TO_SOCKET_OR_NULL ||
411
	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
412
	       type == PTR_TO_TCP_SOCK_OR_NULL ||
413
	       type == PTR_TO_BTF_ID_OR_NULL ||
414 415 416
	       type == PTR_TO_MEM_OR_NULL ||
	       type == PTR_TO_RDONLY_BUF_OR_NULL ||
	       type == PTR_TO_RDWR_BUF_OR_NULL;
417 418
}

419 420 421 422 423 424
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
	return reg->type == PTR_TO_MAP_VALUE &&
		map_value_has_spin_lock(reg->map_ptr);
}

425 426 427 428 429
static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
	return type == PTR_TO_SOCKET ||
		type == PTR_TO_SOCKET_OR_NULL ||
		type == PTR_TO_TCP_SOCK ||
430 431 432
		type == PTR_TO_TCP_SOCK_OR_NULL ||
		type == PTR_TO_MEM ||
		type == PTR_TO_MEM_OR_NULL;
433 434
}

435
static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
436
{
437
	return type == ARG_PTR_TO_SOCK_COMMON;
438 439
}

440 441 442 443 444 445 446 447 448
static bool arg_type_may_be_null(enum bpf_arg_type type)
{
	return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
	       type == ARG_PTR_TO_MEM_OR_NULL ||
	       type == ARG_PTR_TO_CTX_OR_NULL ||
	       type == ARG_PTR_TO_SOCKET_OR_NULL ||
	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
}

449 450 451 452 453 454
/* Determine whether the function releases some resources allocated by another
 * function call. The first reference type argument will be assumed to be
 * released by release_reference().
 */
static bool is_release_function(enum bpf_func_id func_id)
{
455 456 457
	return func_id == BPF_FUNC_sk_release ||
	       func_id == BPF_FUNC_ringbuf_submit ||
	       func_id == BPF_FUNC_ringbuf_discard;
458 459
}

460
static bool may_be_acquire_function(enum bpf_func_id func_id)
461 462
{
	return func_id == BPF_FUNC_sk_lookup_tcp ||
L
Lorenz Bauer 已提交
463
		func_id == BPF_FUNC_sk_lookup_udp ||
464
		func_id == BPF_FUNC_skc_lookup_tcp ||
465 466
		func_id == BPF_FUNC_map_lookup_elem ||
	        func_id == BPF_FUNC_ringbuf_reserve;
467 468 469 470 471 472 473 474 475
}

static bool is_acquire_function(enum bpf_func_id func_id,
				const struct bpf_map *map)
{
	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;

	if (func_id == BPF_FUNC_sk_lookup_tcp ||
	    func_id == BPF_FUNC_sk_lookup_udp ||
476 477
	    func_id == BPF_FUNC_skc_lookup_tcp ||
	    func_id == BPF_FUNC_ringbuf_reserve)
478 479 480 481 482 483 484 485
		return true;

	if (func_id == BPF_FUNC_map_lookup_elem &&
	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
	     map_type == BPF_MAP_TYPE_SOCKHASH))
		return true;

	return false;
486 487
}

488 489 490
static bool is_ptr_cast_function(enum bpf_func_id func_id)
{
	return func_id == BPF_FUNC_tcp_sock ||
491 492 493 494 495 496
		func_id == BPF_FUNC_sk_fullsock ||
		func_id == BPF_FUNC_skc_to_tcp_sock ||
		func_id == BPF_FUNC_skc_to_tcp6_sock ||
		func_id == BPF_FUNC_skc_to_udp6_sock ||
		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
		func_id == BPF_FUNC_skc_to_tcp_request_sock;
497 498
}

499 500 501
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
	[NOT_INIT]		= "?",
502
	[SCALAR_VALUE]		= "inv",
503 504 505 506 507
	[PTR_TO_CTX]		= "ctx",
	[CONST_PTR_TO_MAP]	= "map_ptr",
	[PTR_TO_MAP_VALUE]	= "map_value",
	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
	[PTR_TO_STACK]		= "fp",
A
Alexei Starovoitov 已提交
508
	[PTR_TO_PACKET]		= "pkt",
509
	[PTR_TO_PACKET_META]	= "pkt_meta",
A
Alexei Starovoitov 已提交
510
	[PTR_TO_PACKET_END]	= "pkt_end",
511
	[PTR_TO_FLOW_KEYS]	= "flow_keys",
512 513
	[PTR_TO_SOCKET]		= "sock",
	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
514 515
	[PTR_TO_SOCK_COMMON]	= "sock_common",
	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
516 517
	[PTR_TO_TCP_SOCK]	= "tcp_sock",
	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
518
	[PTR_TO_TP_BUFFER]	= "tp_buffer",
519
	[PTR_TO_XDP_SOCK]	= "xdp_sock",
520
	[PTR_TO_BTF_ID]		= "ptr_",
521
	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
H
Hao Luo 已提交
522
	[PTR_TO_PERCPU_BTF_ID]	= "percpu_ptr_",
523 524
	[PTR_TO_MEM]		= "mem",
	[PTR_TO_MEM_OR_NULL]	= "mem_or_null",
525 526 527 528
	[PTR_TO_RDONLY_BUF]	= "rdonly_buf",
	[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
	[PTR_TO_RDWR_BUF]	= "rdwr_buf",
	[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
529 530
};

531 532 533 534 535 536 537
static char slot_type_char[] = {
	[STACK_INVALID]	= '?',
	[STACK_SPILL]	= 'r',
	[STACK_MISC]	= 'm',
	[STACK_ZERO]	= '0',
};

538 539 540
static void print_liveness(struct bpf_verifier_env *env,
			   enum bpf_reg_liveness live)
{
541
	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
542 543 544 545 546
	    verbose(env, "_");
	if (live & REG_LIVE_READ)
		verbose(env, "r");
	if (live & REG_LIVE_WRITTEN)
		verbose(env, "w");
547 548
	if (live & REG_LIVE_DONE)
		verbose(env, "D");
549 550
}

551 552 553 554 555 556 557 558
static struct bpf_func_state *func(struct bpf_verifier_env *env,
				   const struct bpf_reg_state *reg)
{
	struct bpf_verifier_state *cur = env->cur_state;

	return cur->frame[reg->frameno];
}

559 560 561 562 563 564
const char *kernel_type_name(u32 id)
{
	return btf_name_by_offset(btf_vmlinux,
				  btf_type_by_id(btf_vmlinux, id)->name_off);
}

565
static void print_verifier_state(struct bpf_verifier_env *env,
566
				 const struct bpf_func_state *state)
567
{
568
	const struct bpf_reg_state *reg;
569 570 571
	enum bpf_reg_type t;
	int i;

572 573
	if (state->frameno)
		verbose(env, " frame%d:", state->frameno);
574
	for (i = 0; i < MAX_BPF_REG; i++) {
A
Alexei Starovoitov 已提交
575 576
		reg = &state->regs[i];
		t = reg->type;
577 578
		if (t == NOT_INIT)
			continue;
579 580 581
		verbose(env, " R%d", i);
		print_liveness(env, reg->live);
		verbose(env, "=%s", reg_type_str[t]);
582 583
		if (t == SCALAR_VALUE && reg->precise)
			verbose(env, "P");
584 585 586
		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
		    tnum_is_const(reg->var_off)) {
			/* reg->off should be 0 for SCALAR_VALUE */
587
			verbose(env, "%lld", reg->var_off.value + reg->off);
588
		} else {
H
Hao Luo 已提交
589 590 591
			if (t == PTR_TO_BTF_ID ||
			    t == PTR_TO_BTF_ID_OR_NULL ||
			    t == PTR_TO_PERCPU_BTF_ID)
592
				verbose(env, "%s", kernel_type_name(reg->btf_id));
593 594 595
			verbose(env, "(id=%d", reg->id);
			if (reg_type_may_be_refcounted_or_null(t))
				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
596
			if (t != SCALAR_VALUE)
597
				verbose(env, ",off=%d", reg->off);
598
			if (type_is_pkt_pointer(t))
599
				verbose(env, ",r=%d", reg->range);
600 601 602
			else if (t == CONST_PTR_TO_MAP ||
				 t == PTR_TO_MAP_VALUE ||
				 t == PTR_TO_MAP_VALUE_OR_NULL)
603
				verbose(env, ",ks=%d,vs=%d",
604 605
					reg->map_ptr->key_size,
					reg->map_ptr->value_size);
606 607 608 609 610
			if (tnum_is_const(reg->var_off)) {
				/* Typically an immediate SCALAR_VALUE, but
				 * could be a pointer whose offset is too big
				 * for reg->off
				 */
611
				verbose(env, ",imm=%llx", reg->var_off.value);
612 613 614
			} else {
				if (reg->smin_value != reg->umin_value &&
				    reg->smin_value != S64_MIN)
615
					verbose(env, ",smin_value=%lld",
616 617 618
						(long long)reg->smin_value);
				if (reg->smax_value != reg->umax_value &&
				    reg->smax_value != S64_MAX)
619
					verbose(env, ",smax_value=%lld",
620 621
						(long long)reg->smax_value);
				if (reg->umin_value != 0)
622
					verbose(env, ",umin_value=%llu",
623 624
						(unsigned long long)reg->umin_value);
				if (reg->umax_value != U64_MAX)
625
					verbose(env, ",umax_value=%llu",
626 627 628
						(unsigned long long)reg->umax_value);
				if (!tnum_is_unknown(reg->var_off)) {
					char tn_buf[48];
629

630
					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
631
					verbose(env, ",var_off=%s", tn_buf);
632
				}
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
				if (reg->s32_min_value != reg->smin_value &&
				    reg->s32_min_value != S32_MIN)
					verbose(env, ",s32_min_value=%d",
						(int)(reg->s32_min_value));
				if (reg->s32_max_value != reg->smax_value &&
				    reg->s32_max_value != S32_MAX)
					verbose(env, ",s32_max_value=%d",
						(int)(reg->s32_max_value));
				if (reg->u32_min_value != reg->umin_value &&
				    reg->u32_min_value != U32_MIN)
					verbose(env, ",u32_min_value=%d",
						(int)(reg->u32_min_value));
				if (reg->u32_max_value != reg->umax_value &&
				    reg->u32_max_value != U32_MAX)
					verbose(env, ",u32_max_value=%d",
						(int)(reg->u32_max_value));
649
			}
650
			verbose(env, ")");
651
		}
652
	}
653
	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
		char types_buf[BPF_REG_SIZE + 1];
		bool valid = false;
		int j;

		for (j = 0; j < BPF_REG_SIZE; j++) {
			if (state->stack[i].slot_type[j] != STACK_INVALID)
				valid = true;
			types_buf[j] = slot_type_char[
					state->stack[i].slot_type[j]];
		}
		types_buf[BPF_REG_SIZE] = 0;
		if (!valid)
			continue;
		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
		print_liveness(env, state->stack[i].spilled_ptr.live);
669 670 671 672 673 674 675 676 677
		if (state->stack[i].slot_type[0] == STACK_SPILL) {
			reg = &state->stack[i].spilled_ptr;
			t = reg->type;
			verbose(env, "=%s", reg_type_str[t]);
			if (t == SCALAR_VALUE && reg->precise)
				verbose(env, "P");
			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
				verbose(env, "%lld", reg->var_off.value + reg->off);
		} else {
678
			verbose(env, "=%s", types_buf);
679
		}
680
	}
681 682 683 684 685 686
	if (state->acquired_refs && state->refs[0].id) {
		verbose(env, " refs=%d", state->refs[0].id);
		for (i = 1; i < state->acquired_refs; i++)
			if (state->refs[i].id)
				verbose(env, ",%d", state->refs[i].id);
	}
687
	verbose(env, "\n");
688 689
}

J
Joe Stringer 已提交
690 691 692 693 694 695 696 697 698 699 700 701 702 703
#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
static int copy_##NAME##_state(struct bpf_func_state *dst,		\
			       const struct bpf_func_state *src)	\
{									\
	if (!src->FIELD)						\
		return 0;						\
	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
		/* internal bug, make state invalid to reject the program */ \
		memset(dst, 0, sizeof(*dst));				\
		return -EFAULT;						\
	}								\
	memcpy(dst->FIELD, src->FIELD,					\
	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
	return 0;							\
704
}
705 706
/* copy_reference_state() */
COPY_STATE_FN(reference, acquired_refs, refs, 1)
J
Joe Stringer 已提交
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
/* copy_stack_state() */
COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
#undef COPY_STATE_FN

#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
				  bool copy_old)			\
{									\
	u32 old_size = state->COUNT;					\
	struct bpf_##NAME##_state *new_##FIELD;				\
	int slot = size / SIZE;						\
									\
	if (size <= old_size || !size) {				\
		if (copy_old)						\
			return 0;					\
		state->COUNT = slot * SIZE;				\
		if (!size && old_size) {				\
			kfree(state->FIELD);				\
			state->FIELD = NULL;				\
		}							\
		return 0;						\
	}								\
	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
				    GFP_KERNEL);			\
	if (!new_##FIELD)						\
		return -ENOMEM;						\
	if (copy_old) {							\
		if (state->FIELD)					\
			memcpy(new_##FIELD, state->FIELD,		\
			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
		memset(new_##FIELD + old_size / SIZE, 0,		\
		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
	}								\
	state->COUNT = slot * SIZE;					\
	kfree(state->FIELD);						\
	state->FIELD = new_##FIELD;					\
	return 0;							\
}
745 746
/* realloc_reference_state() */
REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
J
Joe Stringer 已提交
747 748 749
/* realloc_stack_state() */
REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
#undef REALLOC_STATE_FN
750 751 752

/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 * make it consume minimal amount of memory. check_stack_write() access from
753
 * the program calls into realloc_func_state() to grow the stack size.
J
Joe Stringer 已提交
754 755 756
 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 * which realloc_stack_state() copies over. It points to previous
 * bpf_verifier_state which is never reallocated.
757
 */
758 759
static int realloc_func_state(struct bpf_func_state *state, int stack_size,
			      int refs_size, bool copy_old)
760
{
761 762 763 764 765 766 767 768 769 770
	int err = realloc_reference_state(state, refs_size, copy_old);
	if (err)
		return err;
	return realloc_stack_state(state, stack_size, copy_old);
}

/* Acquire a pointer id from the env and update the state->refs to include
 * this new pointer reference.
 * On success, returns a valid pointer id to associate with the register
 * On failure, returns a negative errno.
771
 */
772
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
773
{
774 775 776 777 778 779 780 781 782 783
	struct bpf_func_state *state = cur_func(env);
	int new_ofs = state->acquired_refs;
	int id, err;

	err = realloc_reference_state(state, state->acquired_refs + 1, true);
	if (err)
		return err;
	id = ++env->id_gen;
	state->refs[new_ofs].id = id;
	state->refs[new_ofs].insn_idx = insn_idx;
784

785 786 787 788
	return id;
}

/* release function corresponding to acquire_reference_state(). Idempotent. */
789
static int release_reference_state(struct bpf_func_state *state, int ptr_id)
790 791 792 793 794 795 796 797 798 799 800
{
	int i, last_idx;

	last_idx = state->acquired_refs - 1;
	for (i = 0; i < state->acquired_refs; i++) {
		if (state->refs[i].id == ptr_id) {
			if (last_idx && i != last_idx)
				memcpy(&state->refs[i], &state->refs[last_idx],
				       sizeof(*state->refs));
			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
			state->acquired_refs--;
801 802 803
			return 0;
		}
	}
804
	return -EINVAL;
805 806 807 808 809 810 811 812 813 814 815
}

static int transfer_reference_state(struct bpf_func_state *dst,
				    struct bpf_func_state *src)
{
	int err = realloc_reference_state(dst, src->acquired_refs, false);
	if (err)
		return err;
	err = copy_reference_state(dst, src);
	if (err)
		return err;
816 817 818
	return 0;
}

819 820
static void free_func_state(struct bpf_func_state *state)
{
821 822
	if (!state)
		return;
823
	kfree(state->refs);
824 825 826 827
	kfree(state->stack);
	kfree(state);
}

828 829 830 831 832 833 834
static void clear_jmp_history(struct bpf_verifier_state *state)
{
	kfree(state->jmp_history);
	state->jmp_history = NULL;
	state->jmp_history_cnt = 0;
}

835 836
static void free_verifier_state(struct bpf_verifier_state *state,
				bool free_self)
837
{
838 839 840 841 842 843
	int i;

	for (i = 0; i <= state->curframe; i++) {
		free_func_state(state->frame[i]);
		state->frame[i] = NULL;
	}
844
	clear_jmp_history(state);
845 846
	if (free_self)
		kfree(state);
847 848 849 850 851
}

/* copy verifier state from src to dst growing dst stack space
 * when necessary to accommodate larger src stack
 */
852 853
static int copy_func_state(struct bpf_func_state *dst,
			   const struct bpf_func_state *src)
854 855 856
{
	int err;

857 858 859 860 861 862
	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
				 false);
	if (err)
		return err;
	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
	err = copy_reference_state(dst, src);
863 864 865 866 867
	if (err)
		return err;
	return copy_stack_state(dst, src);
}

868 869 870 871
static int copy_verifier_state(struct bpf_verifier_state *dst_state,
			       const struct bpf_verifier_state *src)
{
	struct bpf_func_state *dst;
872
	u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
873 874
	int i, err;

875 876 877 878 879 880 881 882 883
	if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
		kfree(dst_state->jmp_history);
		dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
		if (!dst_state->jmp_history)
			return -ENOMEM;
	}
	memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
	dst_state->jmp_history_cnt = src->jmp_history_cnt;

884 885 886 887 888
	/* if dst has more stack frames then src frame, free them */
	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
		free_func_state(dst_state->frame[i]);
		dst_state->frame[i] = NULL;
	}
889
	dst_state->speculative = src->speculative;
890
	dst_state->curframe = src->curframe;
891
	dst_state->active_spin_lock = src->active_spin_lock;
892 893
	dst_state->branches = src->branches;
	dst_state->parent = src->parent;
894 895
	dst_state->first_insn_idx = src->first_insn_idx;
	dst_state->last_insn_idx = src->last_insn_idx;
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
	for (i = 0; i <= src->curframe; i++) {
		dst = dst_state->frame[i];
		if (!dst) {
			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
			if (!dst)
				return -ENOMEM;
			dst_state->frame[i] = dst;
		}
		err = copy_func_state(dst, src->frame[i]);
		if (err)
			return err;
	}
	return 0;
}

911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
	while (st) {
		u32 br = --st->branches;

		/* WARN_ON(br > 1) technically makes sense here,
		 * but see comment in push_stack(), hence:
		 */
		WARN_ONCE((int)br < 0,
			  "BUG update_branch_counts:branches_to_explore=%d\n",
			  br);
		if (br)
			break;
		st = st->parent;
	}
}

928
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
929
		     int *insn_idx, bool pop_log)
930 931 932 933
{
	struct bpf_verifier_state *cur = env->cur_state;
	struct bpf_verifier_stack_elem *elem, *head = env->head;
	int err;
934 935

	if (env->head == NULL)
936
		return -ENOENT;
937

938 939 940 941 942
	if (cur) {
		err = copy_verifier_state(cur, &head->st);
		if (err)
			return err;
	}
943 944
	if (pop_log)
		bpf_vlog_reset(&env->log, head->log_pos);
945 946
	if (insn_idx)
		*insn_idx = head->insn_idx;
947
	if (prev_insn_idx)
948 949
		*prev_insn_idx = head->prev_insn_idx;
	elem = head->next;
950
	free_verifier_state(&head->st, false);
951
	kfree(head);
952 953
	env->head = elem;
	env->stack_size--;
954
	return 0;
955 956
}

957
static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
958 959
					     int insn_idx, int prev_insn_idx,
					     bool speculative)
960
{
961
	struct bpf_verifier_state *cur = env->cur_state;
962
	struct bpf_verifier_stack_elem *elem;
963
	int err;
964

965
	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
966 967 968 969 970 971
	if (!elem)
		goto err;

	elem->insn_idx = insn_idx;
	elem->prev_insn_idx = prev_insn_idx;
	elem->next = env->head;
972
	elem->log_pos = env->log.len_used;
973 974
	env->head = elem;
	env->stack_size++;
975 976 977
	err = copy_verifier_state(&elem->st, cur);
	if (err)
		goto err;
978
	elem->st.speculative |= speculative;
979 980 981
	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
		verbose(env, "The sequence of %d jumps is too complex.\n",
			env->stack_size);
982 983
		goto err;
	}
984 985 986 987 988 989 990 991 992 993 994 995
	if (elem->st.parent) {
		++elem->st.parent->branches;
		/* WARN_ON(branches > 2) technically makes sense here,
		 * but
		 * 1. speculative states will bump 'branches' for non-branch
		 * instructions
		 * 2. is_state_visited() heuristics may decide not to create
		 * a new state for a sequence of branches and all such current
		 * and cloned states will be pointing to a single parent state
		 * which might have large 'branches' count.
		 */
	}
996 997
	return &elem->st;
err:
998 999
	free_verifier_state(env->cur_state, true);
	env->cur_state = NULL;
1000
	/* pop all elements and return */
1001
	while (!pop_stack(env, NULL, NULL, false));
1002 1003 1004 1005 1006 1007 1008 1009
	return NULL;
}

#define CALLER_SAVED_REGS 6
static const int caller_saved[CALLER_SAVED_REGS] = {
	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};

1010 1011
static void __mark_reg_not_init(const struct bpf_verifier_env *env,
				struct bpf_reg_state *reg);
1012

1013 1014
/* This helper doesn't clear reg->id */
static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1015 1016 1017 1018 1019 1020
{
	reg->var_off = tnum_const(imm);
	reg->smin_value = (s64)imm;
	reg->smax_value = (s64)imm;
	reg->umin_value = imm;
	reg->umax_value = imm;
1021 1022 1023 1024 1025 1026 1027

	reg->s32_min_value = (s32)imm;
	reg->s32_max_value = (s32)imm;
	reg->u32_min_value = (u32)imm;
	reg->u32_max_value = (u32)imm;
}

1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
/* Mark the unknown part of a register (variable offset or scalar value) as
 * known to have the value @imm.
 */
static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
{
	/* Clear id, off, and union(map_ptr, range) */
	memset(((u8 *)reg) + sizeof(reg->type), 0,
	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
	___mark_reg_known(reg, imm);
}

1039 1040 1041 1042 1043 1044 1045
static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
{
	reg->var_off = tnum_const_subreg(reg->var_off, imm);
	reg->s32_min_value = (s32)imm;
	reg->s32_max_value = (s32)imm;
	reg->u32_min_value = (u32)imm;
	reg->u32_max_value = (u32)imm;
1046 1047
}

1048 1049 1050 1051
/* Mark the 'variable offset' part of a register as zero.  This should be
 * used only on registers holding a pointer type.
 */
static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1052
{
1053
	__mark_reg_known(reg, 0);
1054
}
1055

1056 1057 1058 1059 1060 1061
static void __mark_reg_const_zero(struct bpf_reg_state *reg)
{
	__mark_reg_known(reg, 0);
	reg->type = SCALAR_VALUE;
}

1062 1063
static void mark_reg_known_zero(struct bpf_verifier_env *env,
				struct bpf_reg_state *regs, u32 regno)
1064 1065
{
	if (WARN_ON(regno >= MAX_BPF_REG)) {
1066
		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1067 1068
		/* Something bad happened, let's kill all regs */
		for (regno = 0; regno < MAX_BPF_REG; regno++)
1069
			__mark_reg_not_init(env, regs + regno);
1070 1071 1072 1073 1074
		return;
	}
	__mark_reg_known_zero(regs + regno);
}

1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099
static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
{
	return type_is_pkt_pointer(reg->type);
}

static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
{
	return reg_is_pkt_pointer(reg) ||
	       reg->type == PTR_TO_PACKET_END;
}

/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
				    enum bpf_reg_type which)
{
	/* The register can already have a range from prior markings.
	 * This is fine as long as it hasn't been advanced from its
	 * origin.
	 */
	return reg->type == which &&
	       reg->id == 0 &&
	       reg->off == 0 &&
	       tnum_equals_const(reg->var_off, 0);
}

1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
/* Reset the min/max bounds of a register */
static void __mark_reg_unbounded(struct bpf_reg_state *reg)
{
	reg->smin_value = S64_MIN;
	reg->smax_value = S64_MAX;
	reg->umin_value = 0;
	reg->umax_value = U64_MAX;

	reg->s32_min_value = S32_MIN;
	reg->s32_max_value = S32_MAX;
	reg->u32_min_value = 0;
	reg->u32_max_value = U32_MAX;
}

static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
{
	reg->smin_value = S64_MIN;
	reg->smax_value = S64_MAX;
	reg->umin_value = 0;
	reg->umax_value = U64_MAX;
}

static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
{
	reg->s32_min_value = S32_MIN;
	reg->s32_max_value = S32_MAX;
	reg->u32_min_value = 0;
	reg->u32_max_value = U32_MAX;
}

static void __update_reg32_bounds(struct bpf_reg_state *reg)
{
	struct tnum var32_off = tnum_subreg(reg->var_off);

	/* min signed is max(sign bit) | min(other bits) */
	reg->s32_min_value = max_t(s32, reg->s32_min_value,
			var32_off.value | (var32_off.mask & S32_MIN));
	/* max signed is min(sign bit) | max(other bits) */
	reg->s32_max_value = min_t(s32, reg->s32_max_value,
			var32_off.value | (var32_off.mask & S32_MAX));
	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
	reg->u32_max_value = min(reg->u32_max_value,
				 (u32)(var32_off.value | var32_off.mask));
}

static void __update_reg64_bounds(struct bpf_reg_state *reg)
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157
{
	/* min signed is max(sign bit) | min(other bits) */
	reg->smin_value = max_t(s64, reg->smin_value,
				reg->var_off.value | (reg->var_off.mask & S64_MIN));
	/* max signed is min(sign bit) | max(other bits) */
	reg->smax_value = min_t(s64, reg->smax_value,
				reg->var_off.value | (reg->var_off.mask & S64_MAX));
	reg->umin_value = max(reg->umin_value, reg->var_off.value);
	reg->umax_value = min(reg->umax_value,
			      reg->var_off.value | reg->var_off.mask);
}

1158 1159 1160 1161 1162 1163
static void __update_reg_bounds(struct bpf_reg_state *reg)
{
	__update_reg32_bounds(reg);
	__update_reg64_bounds(reg);
}

1164
/* Uses signed min/max values to inform unsigned, and vice-versa */
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
{
	/* Learn sign from signed bounds.
	 * If we cannot cross the sign boundary, then signed and unsigned bounds
	 * are the same, so combine.  This works even in the negative case, e.g.
	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
	 */
	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
		reg->s32_min_value = reg->u32_min_value =
			max_t(u32, reg->s32_min_value, reg->u32_min_value);
		reg->s32_max_value = reg->u32_max_value =
			min_t(u32, reg->s32_max_value, reg->u32_max_value);
		return;
	}
	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
	 * boundary, so we must be careful.
	 */
	if ((s32)reg->u32_max_value >= 0) {
		/* Positive.  We can't learn anything from the smin, but smax
		 * is positive, hence safe.
		 */
		reg->s32_min_value = reg->u32_min_value;
		reg->s32_max_value = reg->u32_max_value =
			min_t(u32, reg->s32_max_value, reg->u32_max_value);
	} else if ((s32)reg->u32_min_value < 0) {
		/* Negative.  We can't learn anything from the smax, but smin
		 * is negative, hence safe.
		 */
		reg->s32_min_value = reg->u32_min_value =
			max_t(u32, reg->s32_min_value, reg->u32_min_value);
		reg->s32_max_value = reg->u32_max_value;
	}
}

static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
{
	/* Learn sign from signed bounds.
	 * If we cannot cross the sign boundary, then signed and unsigned bounds
	 * are the same, so combine.  This works even in the negative case, e.g.
	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
	 */
	if (reg->smin_value >= 0 || reg->smax_value < 0) {
		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
							  reg->umin_value);
		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
							  reg->umax_value);
		return;
	}
	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
	 * boundary, so we must be careful.
	 */
	if ((s64)reg->umax_value >= 0) {
		/* Positive.  We can't learn anything from the smin, but smax
		 * is positive, hence safe.
		 */
		reg->smin_value = reg->umin_value;
		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
							  reg->umax_value);
	} else if ((s64)reg->umin_value < 0) {
		/* Negative.  We can't learn anything from the smax, but smin
		 * is negative, hence safe.
		 */
		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
							  reg->umin_value);
		reg->smax_value = reg->umax_value;
	}
}

1233 1234 1235 1236 1237 1238
static void __reg_deduce_bounds(struct bpf_reg_state *reg)
{
	__reg32_deduce_bounds(reg);
	__reg64_deduce_bounds(reg);
}

1239 1240 1241
/* Attempts to improve var_off based on unsigned min/max information */
static void __reg_bound_offset(struct bpf_reg_state *reg)
{
1242 1243 1244 1245 1246 1247 1248 1249
	struct tnum var64_off = tnum_intersect(reg->var_off,
					       tnum_range(reg->umin_value,
							  reg->umax_value));
	struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
						tnum_range(reg->u32_min_value,
							   reg->u32_max_value));

	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1250 1251
}

1252
static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1253
{
1254 1255 1256 1257 1258 1259
	reg->umin_value = reg->u32_min_value;
	reg->umax_value = reg->u32_max_value;
	/* Attempt to pull 32-bit signed bounds into 64-bit bounds
	 * but must be positive otherwise set to worse case bounds
	 * and refine later from tnum.
	 */
1260
	if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0)
1261 1262 1263
		reg->smax_value = reg->s32_max_value;
	else
		reg->smax_value = U32_MAX;
1264 1265 1266 1267
	if (reg->s32_min_value >= 0)
		reg->smin_value = reg->s32_min_value;
	else
		reg->smin_value = 0;
1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
}

static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
{
	/* special case when 64-bit register has upper 32-bit register
	 * zeroed. Typically happens after zext or <<32, >>32 sequence
	 * allowing us to use 32-bit bounds directly,
	 */
	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
		__reg_assign_32_into_64(reg);
	} else {
		/* Otherwise the best we can do is push lower 32bit known and
		 * unknown bits into register (var_off set from jmp logic)
		 * then learn as much as possible from the 64-bit tnum
		 * known and unknown bits. The previous smin/smax bounds are
		 * invalid here because of jmp32 compare so mark them unknown
		 * so they do not impact tnum bounds calculation.
		 */
		__mark_reg64_unbounded(reg);
		__update_reg_bounds(reg);
	}

	/* Intersecting with the old var_off might have improved our bounds
	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
	 * then new var_off is (0; 0x7f...fc) which improves our umax.
	 */
	__reg_deduce_bounds(reg);
	__reg_bound_offset(reg);
	__update_reg_bounds(reg);
}

static bool __reg64_bound_s32(s64 a)
{
	if (a > S32_MIN && a < S32_MAX)
		return true;
	return false;
}

static bool __reg64_bound_u32(u64 a)
{
	if (a > U32_MIN && a < U32_MAX)
		return true;
	return false;
}

static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
{
	__mark_reg32_unbounded(reg);

	if (__reg64_bound_s32(reg->smin_value))
		reg->s32_min_value = (s32)reg->smin_value;
	if (__reg64_bound_s32(reg->smax_value))
		reg->s32_max_value = (s32)reg->smax_value;
	if (__reg64_bound_u32(reg->umin_value))
		reg->u32_min_value = (u32)reg->umin_value;
	if (__reg64_bound_u32(reg->umax_value))
		reg->u32_max_value = (u32)reg->umax_value;

	/* Intersecting with the old var_off might have improved our bounds
	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
	 * then new var_off is (0; 0x7f...fc) which improves our umax.
	 */
	__reg_deduce_bounds(reg);
	__reg_bound_offset(reg);
	__update_reg_bounds(reg);
1333 1334
}

1335
/* Mark a register as having a completely unknown (scalar) value. */
1336 1337
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
			       struct bpf_reg_state *reg)
1338
{
1339 1340 1341 1342 1343
	/*
	 * Clear type, id, off, and union(map_ptr, range) and
	 * padding between 'type' and union
	 */
	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1344 1345
	reg->type = SCALAR_VALUE;
	reg->var_off = tnum_unknown;
1346
	reg->frameno = 0;
A
Alexei Starovoitov 已提交
1347
	reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1348
	__mark_reg_unbounded(reg);
1349 1350
}

1351 1352
static void mark_reg_unknown(struct bpf_verifier_env *env,
			     struct bpf_reg_state *regs, u32 regno)
1353 1354
{
	if (WARN_ON(regno >= MAX_BPF_REG)) {
1355
		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1356 1357
		/* Something bad happened, let's kill all regs except FP */
		for (regno = 0; regno < BPF_REG_FP; regno++)
1358
			__mark_reg_not_init(env, regs + regno);
1359 1360
		return;
	}
1361
	__mark_reg_unknown(env, regs + regno);
1362 1363
}

1364 1365
static void __mark_reg_not_init(const struct bpf_verifier_env *env,
				struct bpf_reg_state *reg)
1366
{
1367
	__mark_reg_unknown(env, reg);
1368 1369 1370
	reg->type = NOT_INIT;
}

1371 1372
static void mark_reg_not_init(struct bpf_verifier_env *env,
			      struct bpf_reg_state *regs, u32 regno)
1373 1374
{
	if (WARN_ON(regno >= MAX_BPF_REG)) {
1375
		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1376 1377
		/* Something bad happened, let's kill all regs except FP */
		for (regno = 0; regno < BPF_REG_FP; regno++)
1378
			__mark_reg_not_init(env, regs + regno);
1379 1380
		return;
	}
1381
	__mark_reg_not_init(env, regs + regno);
1382 1383
}

1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396
static void mark_btf_ld_reg(struct bpf_verifier_env *env,
			    struct bpf_reg_state *regs, u32 regno,
			    enum bpf_reg_type reg_type, u32 btf_id)
{
	if (reg_type == SCALAR_VALUE) {
		mark_reg_unknown(env, regs, regno);
		return;
	}
	mark_reg_known_zero(env, regs, regno);
	regs[regno].type = PTR_TO_BTF_ID;
	regs[regno].btf_id = btf_id;
}

1397
#define DEF_NOT_SUBREG	(0)
1398
static void init_reg_state(struct bpf_verifier_env *env,
1399
			   struct bpf_func_state *state)
1400
{
1401
	struct bpf_reg_state *regs = state->regs;
1402 1403
	int i;

1404
	for (i = 0; i < MAX_BPF_REG; i++) {
1405
		mark_reg_not_init(env, regs, i);
1406
		regs[i].live = REG_LIVE_NONE;
1407
		regs[i].parent = NULL;
1408
		regs[i].subreg_def = DEF_NOT_SUBREG;
1409
	}
1410 1411

	/* frame pointer */
1412
	regs[BPF_REG_FP].type = PTR_TO_STACK;
1413
	mark_reg_known_zero(env, regs, BPF_REG_FP);
1414
	regs[BPF_REG_FP].frameno = state->frameno;
1415 1416
}

1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427
#define BPF_MAIN_FUNC (-1)
static void init_func_state(struct bpf_verifier_env *env,
			    struct bpf_func_state *state,
			    int callsite, int frameno, int subprogno)
{
	state->callsite = callsite;
	state->frameno = frameno;
	state->subprogno = subprogno;
	init_reg_state(env, state);
}

1428 1429 1430 1431 1432 1433
enum reg_arg_type {
	SRC_OP,		/* register is used as source operand */
	DST_OP,		/* register is used as destination operand */
	DST_OP_NO_MARK	/* same as above, check only, don't mark */
};

1434 1435
static int cmp_subprogs(const void *a, const void *b)
{
1436 1437
	return ((struct bpf_subprog_info *)a)->start -
	       ((struct bpf_subprog_info *)b)->start;
1438 1439 1440 1441
}

static int find_subprog(struct bpf_verifier_env *env, int off)
{
1442
	struct bpf_subprog_info *p;
1443

1444 1445
	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
		    sizeof(env->subprog_info[0]), cmp_subprogs);
1446 1447
	if (!p)
		return -ENOENT;
1448
	return p - env->subprog_info;
1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463

}

static int add_subprog(struct bpf_verifier_env *env, int off)
{
	int insn_cnt = env->prog->len;
	int ret;

	if (off >= insn_cnt || off < 0) {
		verbose(env, "call to invalid destination\n");
		return -EINVAL;
	}
	ret = find_subprog(env, off);
	if (ret >= 0)
		return 0;
J
Jiong Wang 已提交
1464
	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1465 1466 1467
		verbose(env, "too many subprograms\n");
		return -E2BIG;
	}
1468 1469 1470
	env->subprog_info[env->subprog_cnt++].start = off;
	sort(env->subprog_info, env->subprog_cnt,
	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1471 1472 1473 1474 1475 1476
	return 0;
}

static int check_subprogs(struct bpf_verifier_env *env)
{
	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1477
	struct bpf_subprog_info *subprog = env->subprog_info;
1478 1479 1480
	struct bpf_insn *insn = env->prog->insnsi;
	int insn_cnt = env->prog->len;

J
Jiong Wang 已提交
1481 1482 1483 1484 1485
	/* Add entry function. */
	ret = add_subprog(env, 0);
	if (ret < 0)
		return ret;

1486 1487 1488 1489 1490 1491
	/* determine subprog starts. The end is one before the next starts */
	for (i = 0; i < insn_cnt; i++) {
		if (insn[i].code != (BPF_JMP | BPF_CALL))
			continue;
		if (insn[i].src_reg != BPF_PSEUDO_CALL)
			continue;
A
Alexei Starovoitov 已提交
1492 1493 1494
		if (!env->bpf_capable) {
			verbose(env,
				"function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1495 1496 1497 1498 1499 1500 1501
			return -EPERM;
		}
		ret = add_subprog(env, i + insn[i].imm + 1);
		if (ret < 0)
			return ret;
	}

J
Jiong Wang 已提交
1502 1503 1504 1505 1506
	/* Add a fake 'exit' subprog which could simplify subprog iteration
	 * logic. 'subprog_cnt' should not be increased.
	 */
	subprog[env->subprog_cnt].start = insn_cnt;

1507
	if (env->log.level & BPF_LOG_LEVEL2)
1508
		for (i = 0; i < env->subprog_cnt; i++)
1509
			verbose(env, "func#%d @%d\n", i, subprog[i].start);
1510 1511

	/* now check that all jumps are within the same subprog */
J
Jiong Wang 已提交
1512 1513
	subprog_start = subprog[cur_subprog].start;
	subprog_end = subprog[cur_subprog + 1].start;
1514 1515 1516
	for (i = 0; i < insn_cnt; i++) {
		u8 code = insn[i].code;

1517 1518 1519 1520
		if (code == (BPF_JMP | BPF_CALL) &&
		    insn[i].imm == BPF_FUNC_tail_call &&
		    insn[i].src_reg != BPF_PSEUDO_CALL)
			subprog[cur_subprog].has_tail_call = true;
1521 1522 1523
		if (BPF_CLASS(code) == BPF_LD &&
		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
			subprog[cur_subprog].has_ld_abs = true;
J
Jiong Wang 已提交
1524
		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544
			goto next;
		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
			goto next;
		off = i + insn[i].off + 1;
		if (off < subprog_start || off >= subprog_end) {
			verbose(env, "jump out of range from insn %d to %d\n", i, off);
			return -EINVAL;
		}
next:
		if (i == subprog_end - 1) {
			/* to avoid fall-through from one subprog into another
			 * the last insn of the subprog should be either exit
			 * or unconditional jump back
			 */
			if (code != (BPF_JMP | BPF_EXIT) &&
			    code != (BPF_JMP | BPF_JA)) {
				verbose(env, "last insn is not an exit or jmp\n");
				return -EINVAL;
			}
			subprog_start = subprog_end;
J
Jiong Wang 已提交
1545 1546
			cur_subprog++;
			if (cur_subprog < env->subprog_cnt)
1547
				subprog_end = subprog[cur_subprog + 1].start;
1548 1549 1550 1551 1552
		}
	}
	return 0;
}

1553 1554 1555
/* Parentage chain of this register (or stack slot) should take care of all
 * issues like callee-saved registers, stack slot allocation time, etc.
 */
1556
static int mark_reg_read(struct bpf_verifier_env *env,
1557
			 const struct bpf_reg_state *state,
1558
			 struct bpf_reg_state *parent, u8 flag)
1559 1560
{
	bool writes = parent == state->parent; /* Observe write marks */
1561
	int cnt = 0;
1562 1563 1564

	while (parent) {
		/* if read wasn't screened by an earlier write ... */
1565
		if (writes && state->live & REG_LIVE_WRITTEN)
1566
			break;
1567 1568 1569 1570 1571 1572
		if (parent->live & REG_LIVE_DONE) {
			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
				reg_type_str[parent->type],
				parent->var_off.value, parent->off);
			return -EFAULT;
		}
1573 1574 1575 1576 1577
		/* The first condition is more likely to be true than the
		 * second, checked it first.
		 */
		if ((parent->live & REG_LIVE_READ) == flag ||
		    parent->live & REG_LIVE_READ64)
1578 1579 1580 1581 1582 1583
			/* The parentage chain never changes and
			 * this parent was already marked as LIVE_READ.
			 * There is no need to keep walking the chain again and
			 * keep re-marking all parents as LIVE_READ.
			 * This case happens when the same register is read
			 * multiple times without writes into it in-between.
1584 1585
			 * Also, if parent has the stronger REG_LIVE_READ64 set,
			 * then no need to set the weak REG_LIVE_READ32.
1586 1587
			 */
			break;
1588
		/* ... then we depend on parent's value */
1589 1590 1591 1592
		parent->live |= flag;
		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
		if (flag == REG_LIVE_READ64)
			parent->live &= ~REG_LIVE_READ32;
1593 1594
		state = parent;
		parent = state->parent;
1595
		writes = true;
1596
		cnt++;
1597
	}
1598 1599 1600

	if (env->longest_mark_read_walk < cnt)
		env->longest_mark_read_walk = cnt;
1601
	return 0;
1602 1603
}

1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687
/* This function is supposed to be used by the following 32-bit optimization
 * code only. It returns TRUE if the source or destination register operates
 * on 64-bit, otherwise return FALSE.
 */
static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
{
	u8 code, class, op;

	code = insn->code;
	class = BPF_CLASS(code);
	op = BPF_OP(code);
	if (class == BPF_JMP) {
		/* BPF_EXIT for "main" will reach here. Return TRUE
		 * conservatively.
		 */
		if (op == BPF_EXIT)
			return true;
		if (op == BPF_CALL) {
			/* BPF to BPF call will reach here because of marking
			 * caller saved clobber with DST_OP_NO_MARK for which we
			 * don't care the register def because they are anyway
			 * marked as NOT_INIT already.
			 */
			if (insn->src_reg == BPF_PSEUDO_CALL)
				return false;
			/* Helper call will reach here because of arg type
			 * check, conservatively return TRUE.
			 */
			if (t == SRC_OP)
				return true;

			return false;
		}
	}

	if (class == BPF_ALU64 || class == BPF_JMP ||
	    /* BPF_END always use BPF_ALU class. */
	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
		return true;

	if (class == BPF_ALU || class == BPF_JMP32)
		return false;

	if (class == BPF_LDX) {
		if (t != SRC_OP)
			return BPF_SIZE(code) == BPF_DW;
		/* LDX source must be ptr. */
		return true;
	}

	if (class == BPF_STX) {
		if (reg->type != SCALAR_VALUE)
			return true;
		return BPF_SIZE(code) == BPF_DW;
	}

	if (class == BPF_LD) {
		u8 mode = BPF_MODE(code);

		/* LD_IMM64 */
		if (mode == BPF_IMM)
			return true;

		/* Both LD_IND and LD_ABS return 32-bit data. */
		if (t != SRC_OP)
			return  false;

		/* Implicit ctx ptr. */
		if (regno == BPF_REG_6)
			return true;

		/* Explicit source could be any width. */
		return true;
	}

	if (class == BPF_ST)
		/* The only source register for BPF_ST is a ptr. */
		return true;

	/* Conservatively return true at default. */
	return true;
}

1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705
/* Return TRUE if INSN doesn't have explicit value define. */
static bool insn_no_def(struct bpf_insn *insn)
{
	u8 class = BPF_CLASS(insn->code);

	return (class == BPF_JMP || class == BPF_JMP32 ||
		class == BPF_STX || class == BPF_ST);
}

/* Return TRUE if INSN has defined any 32-bit value explicitly. */
static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
	if (insn_no_def(insn))
		return false;

	return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
}

1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718
static void mark_insn_zext(struct bpf_verifier_env *env,
			   struct bpf_reg_state *reg)
{
	s32 def_idx = reg->subreg_def;

	if (def_idx == DEF_NOT_SUBREG)
		return;

	env->insn_aux_data[def_idx - 1].zext_dst = true;
	/* The dst will be zero extended, so won't be sub-register anymore. */
	reg->subreg_def = DEF_NOT_SUBREG;
}

1719
static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1720 1721
			 enum reg_arg_type t)
{
1722 1723
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1724
	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1725
	struct bpf_reg_state *reg, *regs = state->regs;
1726
	bool rw64;
1727

1728
	if (regno >= MAX_BPF_REG) {
1729
		verbose(env, "R%d is invalid\n", regno);
1730 1731 1732
		return -EINVAL;
	}

1733
	reg = &regs[regno];
1734
	rw64 = is_reg64(env, insn, regno, reg, t);
1735 1736
	if (t == SRC_OP) {
		/* check whether register used as source operand can be read */
1737
		if (reg->type == NOT_INIT) {
1738
			verbose(env, "R%d !read_ok\n", regno);
1739 1740
			return -EACCES;
		}
1741
		/* We don't need to worry about FP liveness because it's read-only */
1742 1743 1744
		if (regno == BPF_REG_FP)
			return 0;

1745 1746 1747 1748 1749
		if (rw64)
			mark_insn_zext(env, reg);

		return mark_reg_read(env, reg, reg->parent,
				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1750 1751 1752
	} else {
		/* check whether register used as dest operand can be written to */
		if (regno == BPF_REG_FP) {
1753
			verbose(env, "frame pointer is read only\n");
1754 1755
			return -EACCES;
		}
1756
		reg->live |= REG_LIVE_WRITTEN;
1757
		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1758
		if (t == DST_OP)
1759
			mark_reg_unknown(env, regs, regno);
1760 1761 1762 1763
	}
	return 0;
}

1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883
/* for any branch, call, exit record the history of jmps in the given state */
static int push_jmp_history(struct bpf_verifier_env *env,
			    struct bpf_verifier_state *cur)
{
	u32 cnt = cur->jmp_history_cnt;
	struct bpf_idx_pair *p;

	cnt++;
	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
	if (!p)
		return -ENOMEM;
	p[cnt - 1].idx = env->insn_idx;
	p[cnt - 1].prev_idx = env->prev_insn_idx;
	cur->jmp_history = p;
	cur->jmp_history_cnt = cnt;
	return 0;
}

/* Backtrack one insn at a time. If idx is not at the top of recorded
 * history then previous instruction came from straight line execution.
 */
static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
			     u32 *history)
{
	u32 cnt = *history;

	if (cnt && st->jmp_history[cnt - 1].idx == i) {
		i = st->jmp_history[cnt - 1].prev_idx;
		(*history)--;
	} else {
		i--;
	}
	return i;
}

/* For given verifier state backtrack_insn() is called from the last insn to
 * the first insn. Its purpose is to compute a bitmask of registers and
 * stack slots that needs precision in the parent verifier state.
 */
static int backtrack_insn(struct bpf_verifier_env *env, int idx,
			  u32 *reg_mask, u64 *stack_mask)
{
	const struct bpf_insn_cbs cbs = {
		.cb_print	= verbose,
		.private_data	= env,
	};
	struct bpf_insn *insn = env->prog->insnsi + idx;
	u8 class = BPF_CLASS(insn->code);
	u8 opcode = BPF_OP(insn->code);
	u8 mode = BPF_MODE(insn->code);
	u32 dreg = 1u << insn->dst_reg;
	u32 sreg = 1u << insn->src_reg;
	u32 spi;

	if (insn->code == 0)
		return 0;
	if (env->log.level & BPF_LOG_LEVEL) {
		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
		verbose(env, "%d: ", idx);
		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
	}

	if (class == BPF_ALU || class == BPF_ALU64) {
		if (!(*reg_mask & dreg))
			return 0;
		if (opcode == BPF_MOV) {
			if (BPF_SRC(insn->code) == BPF_X) {
				/* dreg = sreg
				 * dreg needs precision after this insn
				 * sreg needs precision before this insn
				 */
				*reg_mask &= ~dreg;
				*reg_mask |= sreg;
			} else {
				/* dreg = K
				 * dreg needs precision after this insn.
				 * Corresponding register is already marked
				 * as precise=true in this verifier state.
				 * No further markings in parent are necessary
				 */
				*reg_mask &= ~dreg;
			}
		} else {
			if (BPF_SRC(insn->code) == BPF_X) {
				/* dreg += sreg
				 * both dreg and sreg need precision
				 * before this insn
				 */
				*reg_mask |= sreg;
			} /* else dreg += K
			   * dreg still needs precision before this insn
			   */
		}
	} else if (class == BPF_LDX) {
		if (!(*reg_mask & dreg))
			return 0;
		*reg_mask &= ~dreg;

		/* scalars can only be spilled into stack w/o losing precision.
		 * Load from any other memory can be zero extended.
		 * The desire to keep that precision is already indicated
		 * by 'precise' mark in corresponding register of this state.
		 * No further tracking necessary.
		 */
		if (insn->src_reg != BPF_REG_FP)
			return 0;
		if (BPF_SIZE(insn->code) != BPF_DW)
			return 0;

		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
		 * that [fp - off] slot contains scalar that needs to be
		 * tracked with precision
		 */
		spi = (-insn->off - 1) / BPF_REG_SIZE;
		if (spi >= 64) {
			verbose(env, "BUG spi %d\n", spi);
			WARN_ONCE(1, "verifier backtracking bug");
			return -EFAULT;
		}
		*stack_mask |= 1ull << spi;
1884
	} else if (class == BPF_STX || class == BPF_ST) {
1885
		if (*reg_mask & dreg)
1886
			/* stx & st shouldn't be using _scalar_ dst_reg
1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904
			 * to access memory. It means backtracking
			 * encountered a case of pointer subtraction.
			 */
			return -ENOTSUPP;
		/* scalars can only be spilled into stack */
		if (insn->dst_reg != BPF_REG_FP)
			return 0;
		if (BPF_SIZE(insn->code) != BPF_DW)
			return 0;
		spi = (-insn->off - 1) / BPF_REG_SIZE;
		if (spi >= 64) {
			verbose(env, "BUG spi %d\n", spi);
			WARN_ONCE(1, "verifier backtracking bug");
			return -EFAULT;
		}
		if (!(*stack_mask & (1ull << spi)))
			return 0;
		*stack_mask &= ~(1ull << spi);
1905 1906
		if (class == BPF_STX)
			*reg_mask |= sreg;
1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
	} else if (class == BPF_JMP || class == BPF_JMP32) {
		if (opcode == BPF_CALL) {
			if (insn->src_reg == BPF_PSEUDO_CALL)
				return -ENOTSUPP;
			/* regular helper call sets R0 */
			*reg_mask &= ~1;
			if (*reg_mask & 0x3f) {
				/* if backtracing was looking for registers R1-R5
				 * they should have been found already.
				 */
				verbose(env, "BUG regs %x\n", *reg_mask);
				WARN_ONCE(1, "verifier backtracking bug");
				return -EFAULT;
			}
		} else if (opcode == BPF_EXIT) {
			return -ENOTSUPP;
		}
	} else if (class == BPF_LD) {
		if (!(*reg_mask & dreg))
			return 0;
		*reg_mask &= ~dreg;
		/* It's ld_imm64 or ld_abs or ld_ind.
		 * For ld_imm64 no further tracking of precision
		 * into parent is necessary
		 */
		if (mode == BPF_IND || mode == BPF_ABS)
			/* to be analyzed */
			return -ENOTSUPP;
	}
	return 0;
}

/* the scalar precision tracking algorithm:
 * . at the start all registers have precise=false.
 * . scalar ranges are tracked as normal through alu and jmp insns.
 * . once precise value of the scalar register is used in:
 *   .  ptr + scalar alu
 *   . if (scalar cond K|scalar)
 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
 *   backtrack through the verifier states and mark all registers and
 *   stack slots with spilled constants that these scalar regisers
 *   should be precise.
 * . during state pruning two registers (or spilled stack slots)
 *   are equivalent if both are not precise.
 *
 * Note the verifier cannot simply walk register parentage chain,
 * since many different registers and stack slots could have been
 * used to compute single precise scalar.
 *
 * The approach of starting with precise=true for all registers and then
 * backtrack to mark a register as not precise when the verifier detects
 * that program doesn't care about specific value (e.g., when helper
 * takes register as ARG_ANYTHING parameter) is not safe.
 *
 * It's ok to walk single parentage chain of the verifier states.
 * It's possible that this backtracking will go all the way till 1st insn.
 * All other branches will be explored for needing precision later.
 *
 * The backtracking needs to deal with cases like:
 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
 * r9 -= r8
 * r5 = r9
 * if r5 > 0x79f goto pc+7
 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
 * r5 += 1
 * ...
 * call bpf_perf_event_output#25
 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
 *
 * and this case:
 * r6 = 1
 * call foo // uses callee's r6 inside to compute r0
 * r0 += r6
 * if r0 == 0 goto
 *
 * to track above reg_mask/stack_mask needs to be independent for each frame.
 *
 * Also if parent's curframe > frame where backtracking started,
 * the verifier need to mark registers in both frames, otherwise callees
 * may incorrectly prune callers. This is similar to
 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
 *
 * For now backtracking falls back into conservative marking.
 */
static void mark_all_scalars_precise(struct bpf_verifier_env *env,
				     struct bpf_verifier_state *st)
{
	struct bpf_func_state *func;
	struct bpf_reg_state *reg;
	int i, j;

	/* big hammer: mark all scalars precise in this path.
	 * pop_stack may still get !precise scalars.
	 */
	for (; st; st = st->parent)
		for (i = 0; i <= st->curframe; i++) {
			func = st->frame[i];
			for (j = 0; j < BPF_REG_FP; j++) {
				reg = &func->regs[j];
				if (reg->type != SCALAR_VALUE)
					continue;
				reg->precise = true;
			}
			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
				if (func->stack[j].slot_type[0] != STACK_SPILL)
					continue;
				reg = &func->stack[j].spilled_ptr;
				if (reg->type != SCALAR_VALUE)
					continue;
				reg->precise = true;
			}
		}
}

A
Alexei Starovoitov 已提交
2021 2022
static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
				  int spi)
2023 2024 2025 2026 2027 2028
{
	struct bpf_verifier_state *st = env->cur_state;
	int first_idx = st->first_insn_idx;
	int last_idx = env->insn_idx;
	struct bpf_func_state *func;
	struct bpf_reg_state *reg;
A
Alexei Starovoitov 已提交
2029 2030
	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2031
	bool skip_first = true;
A
Alexei Starovoitov 已提交
2032
	bool new_marks = false;
2033 2034
	int i, err;

A
Alexei Starovoitov 已提交
2035
	if (!env->bpf_capable)
2036 2037 2038
		return 0;

	func = st->frame[st->curframe];
A
Alexei Starovoitov 已提交
2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049
	if (regno >= 0) {
		reg = &func->regs[regno];
		if (reg->type != SCALAR_VALUE) {
			WARN_ONCE(1, "backtracing misuse");
			return -EFAULT;
		}
		if (!reg->precise)
			new_marks = true;
		else
			reg_mask = 0;
		reg->precise = true;
2050 2051
	}

A
Alexei Starovoitov 已提交
2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073
	while (spi >= 0) {
		if (func->stack[spi].slot_type[0] != STACK_SPILL) {
			stack_mask = 0;
			break;
		}
		reg = &func->stack[spi].spilled_ptr;
		if (reg->type != SCALAR_VALUE) {
			stack_mask = 0;
			break;
		}
		if (!reg->precise)
			new_marks = true;
		else
			stack_mask = 0;
		reg->precise = true;
		break;
	}

	if (!new_marks)
		return 0;
	if (!reg_mask && !stack_mask)
		return 0;
2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117
	for (;;) {
		DECLARE_BITMAP(mask, 64);
		u32 history = st->jmp_history_cnt;

		if (env->log.level & BPF_LOG_LEVEL)
			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
		for (i = last_idx;;) {
			if (skip_first) {
				err = 0;
				skip_first = false;
			} else {
				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
			}
			if (err == -ENOTSUPP) {
				mark_all_scalars_precise(env, st);
				return 0;
			} else if (err) {
				return err;
			}
			if (!reg_mask && !stack_mask)
				/* Found assignment(s) into tracked register in this state.
				 * Since this state is already marked, just return.
				 * Nothing to be tracked further in the parent state.
				 */
				return 0;
			if (i == first_idx)
				break;
			i = get_prev_insn_idx(st, i, &history);
			if (i >= env->prog->len) {
				/* This can happen if backtracking reached insn 0
				 * and there are still reg_mask or stack_mask
				 * to backtrack.
				 * It means the backtracking missed the spot where
				 * particular register was initialized with a constant.
				 */
				verbose(env, "BUG backtracking idx %d\n", i);
				WARN_ONCE(1, "verifier backtracking bug");
				return -EFAULT;
			}
		}
		st = st->parent;
		if (!st)
			break;

A
Alexei Starovoitov 已提交
2118
		new_marks = false;
2119 2120 2121 2122
		func = st->frame[st->curframe];
		bitmap_from_u64(mask, reg_mask);
		for_each_set_bit(i, mask, 32) {
			reg = &func->regs[i];
A
Alexei Starovoitov 已提交
2123 2124
			if (reg->type != SCALAR_VALUE) {
				reg_mask &= ~(1u << i);
2125
				continue;
A
Alexei Starovoitov 已提交
2126
			}
2127 2128 2129 2130 2131 2132 2133 2134
			if (!reg->precise)
				new_marks = true;
			reg->precise = true;
		}

		bitmap_from_u64(mask, stack_mask);
		for_each_set_bit(i, mask, 64) {
			if (i >= func->allocated_stack / BPF_REG_SIZE) {
2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146
				/* the sequence of instructions:
				 * 2: (bf) r3 = r10
				 * 3: (7b) *(u64 *)(r3 -8) = r0
				 * 4: (79) r4 = *(u64 *)(r10 -8)
				 * doesn't contain jmps. It's backtracked
				 * as a single block.
				 * During backtracking insn 3 is not recognized as
				 * stack access, so at the end of backtracking
				 * stack slot fp-8 is still marked in stack_mask.
				 * However the parent state may not have accessed
				 * fp-8 and it's "unallocated" stack space.
				 * In such case fallback to conservative.
2147
				 */
2148 2149
				mark_all_scalars_precise(env, st);
				return 0;
2150 2151
			}

A
Alexei Starovoitov 已提交
2152 2153
			if (func->stack[i].slot_type[0] != STACK_SPILL) {
				stack_mask &= ~(1ull << i);
2154
				continue;
A
Alexei Starovoitov 已提交
2155
			}
2156
			reg = &func->stack[i].spilled_ptr;
A
Alexei Starovoitov 已提交
2157 2158
			if (reg->type != SCALAR_VALUE) {
				stack_mask &= ~(1ull << i);
2159
				continue;
A
Alexei Starovoitov 已提交
2160
			}
2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171
			if (!reg->precise)
				new_marks = true;
			reg->precise = true;
		}
		if (env->log.level & BPF_LOG_LEVEL) {
			print_verifier_state(env, func);
			verbose(env, "parent %s regs=%x stack=%llx marks\n",
				new_marks ? "didn't have" : "already had",
				reg_mask, stack_mask);
		}

A
Alexei Starovoitov 已提交
2172 2173
		if (!reg_mask && !stack_mask)
			break;
2174 2175 2176 2177 2178 2179 2180 2181 2182
		if (!new_marks)
			break;

		last_idx = st->last_insn_idx;
		first_idx = st->first_insn_idx;
	}
	return 0;
}

A
Alexei Starovoitov 已提交
2183 2184 2185 2186 2187 2188 2189 2190 2191
static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
{
	return __mark_chain_precision(env, regno, -1);
}

static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
{
	return __mark_chain_precision(env, -1, spi);
}
2192

2193 2194 2195 2196 2197 2198 2199
static bool is_spillable_regtype(enum bpf_reg_type type)
{
	switch (type) {
	case PTR_TO_MAP_VALUE:
	case PTR_TO_MAP_VALUE_OR_NULL:
	case PTR_TO_STACK:
	case PTR_TO_CTX:
A
Alexei Starovoitov 已提交
2200
	case PTR_TO_PACKET:
2201
	case PTR_TO_PACKET_META:
A
Alexei Starovoitov 已提交
2202
	case PTR_TO_PACKET_END:
2203
	case PTR_TO_FLOW_KEYS:
2204
	case CONST_PTR_TO_MAP:
2205 2206
	case PTR_TO_SOCKET:
	case PTR_TO_SOCKET_OR_NULL:
2207 2208
	case PTR_TO_SOCK_COMMON:
	case PTR_TO_SOCK_COMMON_OR_NULL:
2209 2210
	case PTR_TO_TCP_SOCK:
	case PTR_TO_TCP_SOCK_OR_NULL:
2211
	case PTR_TO_XDP_SOCK:
2212
	case PTR_TO_BTF_ID:
2213
	case PTR_TO_BTF_ID_OR_NULL:
2214 2215 2216 2217
	case PTR_TO_RDONLY_BUF:
	case PTR_TO_RDONLY_BUF_OR_NULL:
	case PTR_TO_RDWR_BUF:
	case PTR_TO_RDWR_BUF_OR_NULL:
H
Hao Luo 已提交
2218
	case PTR_TO_PERCPU_BTF_ID:
2219 2220 2221 2222 2223 2224
		return true;
	default:
		return false;
	}
}

2225 2226 2227 2228 2229 2230
/* Does this register contain a constant zero? */
static bool register_is_null(struct bpf_reg_state *reg)
{
	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
}

2231 2232 2233 2234 2235
static bool register_is_const(struct bpf_reg_state *reg)
{
	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
}

2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249
static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
{
	return tnum_is_unknown(reg->var_off) &&
	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
}

static bool register_is_bounded(struct bpf_reg_state *reg)
{
	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
}

2250 2251 2252 2253 2254 2255 2256 2257 2258
static bool __is_pointer_value(bool allow_ptr_leaks,
			       const struct bpf_reg_state *reg)
{
	if (allow_ptr_leaks)
		return false;

	return reg->type != SCALAR_VALUE;
}

2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270
static void save_register_state(struct bpf_func_state *state,
				int spi, struct bpf_reg_state *reg)
{
	int i;

	state->stack[spi].spilled_ptr = *reg;
	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;

	for (i = 0; i < BPF_REG_SIZE; i++)
		state->stack[spi].slot_type[i] = STACK_SPILL;
}

2271 2272 2273
/* check_stack_read/write functions track spill/fill of registers,
 * stack boundary and alignment are checked in check_mem_access()
 */
2274
static int check_stack_write(struct bpf_verifier_env *env,
2275
			     struct bpf_func_state *state, /* func where register points to */
2276
			     int off, int size, int value_regno, int insn_idx)
2277
{
2278
	struct bpf_func_state *cur; /* state of the current function */
2279
	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2280
	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2281
	struct bpf_reg_state *reg = NULL;
2282

2283
	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
2284
				 state->acquired_refs, true);
2285 2286
	if (err)
		return err;
2287 2288 2289
	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
	 * so it's aligned access and [off, off + size) are within stack limits
	 */
2290 2291 2292 2293 2294 2295
	if (!env->allow_ptr_leaks &&
	    state->stack[spi].slot_type[0] == STACK_SPILL &&
	    size != BPF_REG_SIZE) {
		verbose(env, "attempt to corrupt spilled pointer on stack\n");
		return -EACCES;
	}
2296

2297
	cur = env->cur_state->frame[env->cur_state->curframe];
2298 2299
	if (value_regno >= 0)
		reg = &cur->regs[value_regno];
2300

2301
	if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
A
Alexei Starovoitov 已提交
2302
	    !register_is_null(reg) && env->bpf_capable) {
2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313
		if (dst_reg != BPF_REG_FP) {
			/* The backtracking logic can only recognize explicit
			 * stack slot address like [fp - 8]. Other spill of
			 * scalar via different register has to be conervative.
			 * Backtrack from here and mark all registers as precise
			 * that contributed into 'reg' being a constant.
			 */
			err = mark_chain_precision(env, value_regno);
			if (err)
				return err;
		}
2314 2315
		save_register_state(state, spi, reg);
	} else if (reg && is_spillable_regtype(reg->type)) {
2316
		/* register containing pointer is being spilled into stack */
2317
		if (size != BPF_REG_SIZE) {
2318
			verbose_linfo(env, insn_idx, "; ");
2319
			verbose(env, "invalid size of register spill\n");
2320 2321 2322
			return -EACCES;
		}

2323
		if (state != cur && reg->type == PTR_TO_STACK) {
2324 2325 2326 2327
			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
			return -EINVAL;
		}

A
Alexei Starovoitov 已提交
2328
		if (!env->bypass_spec_v4) {
2329
			bool sanitize = false;
2330

2331 2332 2333 2334 2335 2336 2337 2338 2339
			if (state->stack[spi].slot_type[0] == STACK_SPILL &&
			    register_is_const(&state->stack[spi].spilled_ptr))
				sanitize = true;
			for (i = 0; i < BPF_REG_SIZE; i++)
				if (state->stack[spi].slot_type[i] == STACK_MISC) {
					sanitize = true;
					break;
				}
			if (sanitize) {
2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362
				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
				int soff = (-spi - 1) * BPF_REG_SIZE;

				/* detected reuse of integer stack slot with a pointer
				 * which means either llvm is reusing stack slot or
				 * an attacker is trying to exploit CVE-2018-3639
				 * (speculative store bypass)
				 * Have to sanitize that slot with preemptive
				 * store of zero.
				 */
				if (*poff && *poff != soff) {
					/* disallow programs where single insn stores
					 * into two different stack slots, since verifier
					 * cannot sanitize them
					 */
					verbose(env,
						"insn %d cannot access two stack slots fp%d and fp%d",
						insn_idx, *poff, soff);
					return -EINVAL;
				}
				*poff = soff;
			}
		}
2363
		save_register_state(state, spi, reg);
2364
	} else {
2365 2366
		u8 type = STACK_MISC;

2367 2368
		/* regular write of data into stack destroys any spilled ptr */
		state->stack[spi].spilled_ptr.type = NOT_INIT;
2369 2370 2371 2372
		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
		if (state->stack[spi].slot_type[0] == STACK_SPILL)
			for (i = 0; i < BPF_REG_SIZE; i++)
				state->stack[spi].slot_type[i] = STACK_MISC;
2373

2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385
		/* only mark the slot as written if all 8 bytes were written
		 * otherwise read propagation may incorrectly stop too soon
		 * when stack slots are partially written.
		 * This heuristic means that read propagation will be
		 * conservative, since it will add reg_live_read marks
		 * to stack slots all the way to first state when programs
		 * writes+reads less than 8 bytes
		 */
		if (size == BPF_REG_SIZE)
			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;

		/* when we zero initialize stack slots mark them as such */
2386 2387 2388 2389 2390
		if (reg && register_is_null(reg)) {
			/* backtracking doesn't work for STACK_ZERO yet. */
			err = mark_chain_precision(env, value_regno);
			if (err)
				return err;
2391
			type = STACK_ZERO;
2392
		}
2393

2394
		/* Mark slots affected by this stack write. */
2395
		for (i = 0; i < size; i++)
2396
			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2397
				type;
2398 2399 2400 2401
	}
	return 0;
}

2402
static int check_stack_read(struct bpf_verifier_env *env,
2403 2404
			    struct bpf_func_state *reg_state /* func where register points to */,
			    int off, int size, int value_regno)
2405
{
2406 2407
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2408
	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2409
	struct bpf_reg_state *reg;
2410
	u8 *stype;
2411

2412
	if (reg_state->allocated_stack <= slot) {
2413 2414 2415 2416
		verbose(env, "invalid read from stack off %d+0 size %d\n",
			off, size);
		return -EACCES;
	}
2417
	stype = reg_state->stack[spi].slot_type;
2418
	reg = &reg_state->stack[spi].spilled_ptr;
2419

2420
	if (stype[0] == STACK_SPILL) {
2421
		if (size != BPF_REG_SIZE) {
2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432
			if (reg->type != SCALAR_VALUE) {
				verbose_linfo(env, env->insn_idx, "; ");
				verbose(env, "invalid size of register fill\n");
				return -EACCES;
			}
			if (value_regno >= 0) {
				mark_reg_unknown(env, state->regs, value_regno);
				state->regs[value_regno].live |= REG_LIVE_WRITTEN;
			}
			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
			return 0;
2433
		}
2434
		for (i = 1; i < BPF_REG_SIZE; i++) {
2435
			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2436
				verbose(env, "corrupted spill memory\n");
2437 2438 2439 2440
				return -EACCES;
			}
		}

2441
		if (value_regno >= 0) {
2442
			/* restore register state from stack */
2443
			state->regs[value_regno] = *reg;
2444 2445 2446 2447 2448
			/* mark reg as written since spilled pointer state likely
			 * has its liveness marks cleared by is_state_visited()
			 * which resets stack/reg liveness for state transitions
			 */
			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2449 2450 2451 2452 2453 2454 2455 2456 2457 2458
		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
			/* If value_regno==-1, the caller is asking us whether
			 * it is acceptable to use this value as a SCALAR_VALUE
			 * (e.g. for XADD).
			 * We must not allow unprivileged callers to do that
			 * with spilled pointers.
			 */
			verbose(env, "leaking pointer from stack off %d\n",
				off);
			return -EACCES;
2459
		}
2460
		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2461
	} else {
2462 2463
		int zeros = 0;

2464
		for (i = 0; i < size; i++) {
2465 2466 2467 2468 2469
			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
				continue;
			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
				zeros++;
				continue;
2470
			}
2471 2472 2473 2474
			verbose(env, "invalid read from stack off %d+%d size %d\n",
				off, i, size);
			return -EACCES;
		}
2475
		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2476 2477 2478 2479 2480 2481
		if (value_regno >= 0) {
			if (zeros == size) {
				/* any size read into register is zero extended,
				 * so the whole register == const_zero
				 */
				__mark_reg_const_zero(&state->regs[value_regno]);
2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492
				/* backtracking doesn't support STACK_ZERO yet,
				 * so mark it precise here, so that later
				 * backtracking can stop here.
				 * Backtracking may not need this if this register
				 * doesn't participate in pointer adjustment.
				 * Forward propagation of precise flag is not
				 * necessary either. This mark is only to stop
				 * backtracking. Any register that contributed
				 * to const 0 was marked precise before spill.
				 */
				state->regs[value_regno].precise = true;
2493 2494 2495 2496 2497
			} else {
				/* have read misc data from the stack */
				mark_reg_unknown(env, state->regs, value_regno);
			}
			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2498 2499
		}
	}
2500
	return 0;
2501 2502
}

2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514
static int check_stack_access(struct bpf_verifier_env *env,
			      const struct bpf_reg_state *reg,
			      int off, int size)
{
	/* Stack accesses must be at a fixed offset, so that we
	 * can determine what type of data were returned. See
	 * check_stack_read().
	 */
	if (!tnum_is_const(reg->var_off)) {
		char tn_buf[48];

		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2515
		verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527
			tn_buf, off, size);
		return -EACCES;
	}

	if (off >= 0 || off < -MAX_BPF_STACK) {
		verbose(env, "invalid stack off=%d size=%d\n", off, size);
		return -EACCES;
	}

	return 0;
}

2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549
static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
				 int off, int size, enum bpf_access_type type)
{
	struct bpf_reg_state *regs = cur_regs(env);
	struct bpf_map *map = regs[regno].map_ptr;
	u32 cap = bpf_map_flags_to_cap(map);

	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
			map->value_size, off, size);
		return -EACCES;
	}

	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
			map->value_size, off, size);
		return -EACCES;
	}

	return 0;
}

2550 2551 2552 2553
/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
static int __check_mem_access(struct bpf_verifier_env *env, int regno,
			      int off, int size, u32 mem_size,
			      bool zero_size_allowed)
2554
{
2555 2556 2557 2558 2559
	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
	struct bpf_reg_state *reg;

	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
		return 0;
2560

2561 2562 2563
	reg = &cur_regs(env)[regno];
	switch (reg->type) {
	case PTR_TO_MAP_VALUE:
2564
		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576
			mem_size, off, size);
		break;
	case PTR_TO_PACKET:
	case PTR_TO_PACKET_META:
	case PTR_TO_PACKET_END:
		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
			off, size, regno, reg->id, off, mem_size);
		break;
	case PTR_TO_MEM:
	default:
		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
			mem_size, off, size);
2577
	}
2578 2579

	return -EACCES;
2580 2581
}

2582 2583 2584 2585
/* check read/write into a memory region with possible variable offset */
static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
				   int off, int size, u32 mem_size,
				   bool zero_size_allowed)
2586
{
2587 2588
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2589 2590 2591
	struct bpf_reg_state *reg = &state->regs[regno];
	int err;

2592
	/* We may have adjusted the register pointing to memory region, so we
2593 2594
	 * need to try adding each of min_value and max_value to off
	 * to make sure our theoretical access will be safe.
2595
	 */
2596
	if (env->log.level & BPF_LOG_LEVEL)
2597
		print_verifier_state(env, state);
2598

2599 2600 2601 2602 2603 2604
	/* The minimum value is only important with signed
	 * comparisons where we can't assume the floor of a
	 * value is 0.  If we are using signed variables for our
	 * index'es we need to make sure that whatever we use
	 * will have a set floor within our range.
	 */
2605 2606 2607 2608
	if (reg->smin_value < 0 &&
	    (reg->smin_value == S64_MIN ||
	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
	      reg->smin_value + off < 0)) {
2609
		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2610 2611 2612
			regno);
		return -EACCES;
	}
2613 2614
	err = __check_mem_access(env, regno, reg->smin_value + off, size,
				 mem_size, zero_size_allowed);
2615
	if (err) {
2616
		verbose(env, "R%d min value is outside of the allowed memory range\n",
2617
			regno);
2618 2619 2620
		return err;
	}

2621 2622 2623
	/* If we haven't set a max value then we need to bail since we can't be
	 * sure we won't do bad things.
	 * If reg->umax_value + off could overflow, treat that as unbounded too.
2624
	 */
2625
	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2626
		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
2627 2628 2629
			regno);
		return -EACCES;
	}
2630 2631 2632 2633
	err = __check_mem_access(env, regno, reg->umax_value + off, size,
				 mem_size, zero_size_allowed);
	if (err) {
		verbose(env, "R%d max value is outside of the allowed memory range\n",
2634
			regno);
2635 2636 2637 2638 2639
		return err;
	}

	return 0;
}
2640

2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657
/* check read/write into a map element with possible variable offset */
static int check_map_access(struct bpf_verifier_env *env, u32 regno,
			    int off, int size, bool zero_size_allowed)
{
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
	struct bpf_reg_state *reg = &state->regs[regno];
	struct bpf_map *map = reg->map_ptr;
	int err;

	err = check_mem_region_access(env, regno, off, size, map->value_size,
				      zero_size_allowed);
	if (err)
		return err;

	if (map_value_has_spin_lock(map)) {
		u32 lock = map->spin_lock_off;
2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669

		/* if any part of struct bpf_spin_lock can be touched by
		 * load/store reject this program.
		 * To check that [x1, x2) overlaps with [y1, y2)
		 * it is sufficient to check x1 < y2 && y1 < x2.
		 */
		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
		     lock < reg->umax_value + off + size) {
			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
			return -EACCES;
		}
	}
2670
	return err;
2671 2672
}

A
Alexei Starovoitov 已提交
2673 2674
#define MAX_PACKET_OFF 0xffff

2675 2676
static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
{
2677
	return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
2678 2679
}

2680
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
2681 2682
				       const struct bpf_call_arg_meta *meta,
				       enum bpf_access_type t)
2683
{
2684 2685 2686
	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);

	switch (prog_type) {
2687
	/* Program types only with direct read access go here! */
2688 2689
	case BPF_PROG_TYPE_LWT_IN:
	case BPF_PROG_TYPE_LWT_OUT:
2690
	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2691
	case BPF_PROG_TYPE_SK_REUSEPORT:
2692
	case BPF_PROG_TYPE_FLOW_DISSECTOR:
2693
	case BPF_PROG_TYPE_CGROUP_SKB:
2694 2695
		if (t == BPF_WRITE)
			return false;
2696
		fallthrough;
2697 2698

	/* Program types with direct read + write access go here! */
2699 2700
	case BPF_PROG_TYPE_SCHED_CLS:
	case BPF_PROG_TYPE_SCHED_ACT:
2701
	case BPF_PROG_TYPE_XDP:
2702
	case BPF_PROG_TYPE_LWT_XMIT:
2703
	case BPF_PROG_TYPE_SK_SKB:
2704
	case BPF_PROG_TYPE_SK_MSG:
2705 2706 2707 2708
		if (meta)
			return meta->pkt_access;

		env->seen_direct_write = true;
2709
		return true;
2710 2711 2712 2713 2714 2715 2716

	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
		if (t == BPF_WRITE)
			env->seen_direct_write = true;

		return true;

2717 2718 2719 2720 2721
	default:
		return false;
	}
}

2722
static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
2723
			       int size, bool zero_size_allowed)
2724
{
2725
	struct bpf_reg_state *regs = cur_regs(env);
2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736
	struct bpf_reg_state *reg = &regs[regno];
	int err;

	/* We may have added a variable offset to the packet pointer; but any
	 * reg->range we have comes after that.  We are only checking the fixed
	 * offset.
	 */

	/* We don't allow negative numbers, because we aren't tracking enough
	 * detail to prove they're safe.
	 */
2737
	if (reg->smin_value < 0) {
2738
		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2739 2740 2741
			regno);
		return -EACCES;
	}
2742 2743
	err = __check_mem_access(env, regno, off, size, reg->range,
				 zero_size_allowed);
2744
	if (err) {
2745
		verbose(env, "R%d offset is outside of the packet\n", regno);
2746 2747
		return err;
	}
2748

2749
	/* __check_mem_access has made sure "off + size - 1" is within u16.
2750 2751
	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
	 * otherwise find_good_pkt_pointers would have refused to set range info
2752
	 * that __check_mem_access would have rejected this pkt access.
2753 2754 2755 2756 2757 2758
	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
	 */
	env->prog->aux->max_pkt_offset =
		max_t(u32, env->prog->aux->max_pkt_offset,
		      off + reg->umax_value + size - 1);

2759 2760 2761 2762
	return err;
}

/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
2763
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
2764 2765
			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
			    u32 *btf_id)
2766
{
2767 2768
	struct bpf_insn_access_aux info = {
		.reg_type = *reg_type,
2769
		.log = &env->log,
2770
	};
2771

2772
	if (env->ops->is_valid_access &&
2773
	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
2774 2775 2776 2777 2778 2779
		/* A non zero info.ctx_field_size indicates that this field is a
		 * candidate for later verifier transformation to load the whole
		 * field and then apply a mask when accessed with a narrower
		 * access than actual ctx access size. A zero info.ctx_field_size
		 * will only allow for whole field access and rejects any other
		 * type of narrower access.
2780
		 */
2781
		*reg_type = info.reg_type;
2782

2783
		if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
2784 2785 2786
			*btf_id = info.btf_id;
		else
			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
2787 2788 2789
		/* remember the offset of last byte accessed in ctx */
		if (env->prog->aux->max_ctx_offset < off + size)
			env->prog->aux->max_ctx_offset = off + size;
2790
		return 0;
2791
	}
2792

2793
	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
2794 2795 2796
	return -EACCES;
}

2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808
static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
				  int size)
{
	if (size < 0 || off < 0 ||
	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
		verbose(env, "invalid access to flow keys off=%d size=%d\n",
			off, size);
		return -EACCES;
	}
	return 0;
}

2809 2810 2811
static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
			     u32 regno, int off, int size,
			     enum bpf_access_type t)
2812 2813 2814
{
	struct bpf_reg_state *regs = cur_regs(env);
	struct bpf_reg_state *reg = &regs[regno];
2815
	struct bpf_insn_access_aux info = {};
2816
	bool valid;
2817 2818 2819 2820 2821 2822 2823

	if (reg->smin_value < 0) {
		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
			regno);
		return -EACCES;
	}

2824 2825 2826 2827 2828 2829 2830
	switch (reg->type) {
	case PTR_TO_SOCK_COMMON:
		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
		break;
	case PTR_TO_SOCKET:
		valid = bpf_sock_is_valid_access(off, size, t, &info);
		break;
2831 2832 2833
	case PTR_TO_TCP_SOCK:
		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
		break;
2834 2835 2836
	case PTR_TO_XDP_SOCK:
		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
		break;
2837 2838
	default:
		valid = false;
2839 2840
	}

2841

2842 2843 2844 2845 2846 2847 2848 2849 2850 2851
	if (valid) {
		env->insn_aux_data[insn_idx].ctx_field_size =
			info.ctx_field_size;
		return 0;
	}

	verbose(env, "R%d invalid %s access off=%d size=%d\n",
		regno, reg_type_str[reg->type], off, size);

	return -EACCES;
2852 2853
}

2854 2855 2856 2857 2858
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
{
	return cur_regs(env) + regno;
}

2859 2860
static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
{
2861
	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
2862 2863
}

2864 2865
static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
{
2866
	const struct bpf_reg_state *reg = reg_state(env, regno);
2867

2868 2869 2870 2871 2872 2873 2874 2875
	return reg->type == PTR_TO_CTX;
}

static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
{
	const struct bpf_reg_state *reg = reg_state(env, regno);

	return type_is_sk_pointer(reg->type);
2876 2877
}

2878 2879
static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
{
2880
	const struct bpf_reg_state *reg = reg_state(env, regno);
2881 2882 2883 2884

	return type_is_pkt_pointer(reg->type);
}

2885 2886 2887 2888 2889 2890 2891 2892
static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
{
	const struct bpf_reg_state *reg = reg_state(env, regno);

	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
	return reg->type == PTR_TO_FLOW_KEYS;
}

2893 2894
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
				   const struct bpf_reg_state *reg,
2895
				   int off, int size, bool strict)
A
Alexei Starovoitov 已提交
2896
{
2897
	struct tnum reg_off;
2898
	int ip_align;
2899 2900 2901 2902 2903

	/* Byte size accesses are always allowed. */
	if (!strict || size == 1)
		return 0;

2904 2905 2906 2907 2908 2909 2910
	/* For platforms that do not have a Kconfig enabling
	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
	 * to this code only in strict mode where we want to emulate
	 * the NET_IP_ALIGN==2 checking.  Therefore use an
	 * unconditional IP align value of '2'.
2911
	 */
2912
	ip_align = 2;
2913 2914 2915 2916 2917 2918

	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
	if (!tnum_is_aligned(reg_off, size)) {
		char tn_buf[48];

		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2919 2920
		verbose(env,
			"misaligned packet access off %d+%s+%d+%d size %d\n",
2921
			ip_align, tn_buf, reg->off, off, size);
A
Alexei Starovoitov 已提交
2922 2923
		return -EACCES;
	}
2924

A
Alexei Starovoitov 已提交
2925 2926 2927
	return 0;
}

2928 2929
static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
				       const struct bpf_reg_state *reg,
2930 2931
				       const char *pointer_desc,
				       int off, int size, bool strict)
2932
{
2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943
	struct tnum reg_off;

	/* Byte size accesses are always allowed. */
	if (!strict || size == 1)
		return 0;

	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
	if (!tnum_is_aligned(reg_off, size)) {
		char tn_buf[48];

		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2944
		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
2945
			pointer_desc, tn_buf, reg->off, off, size);
2946 2947 2948
		return -EACCES;
	}

A
Alexei Starovoitov 已提交
2949 2950 2951
	return 0;
}

2952
static int check_ptr_alignment(struct bpf_verifier_env *env,
2953 2954
			       const struct bpf_reg_state *reg, int off,
			       int size, bool strict_alignment_once)
2955
{
2956
	bool strict = env->strict_alignment || strict_alignment_once;
2957
	const char *pointer_desc = "";
2958

2959 2960
	switch (reg->type) {
	case PTR_TO_PACKET:
2961 2962 2963 2964
	case PTR_TO_PACKET_META:
		/* Special case, because of NET_IP_ALIGN. Given metadata sits
		 * right in front, treat it the very same way.
		 */
2965
		return check_pkt_ptr_alignment(env, reg, off, size, strict);
2966 2967 2968
	case PTR_TO_FLOW_KEYS:
		pointer_desc = "flow keys ";
		break;
2969 2970 2971 2972 2973 2974 2975 2976
	case PTR_TO_MAP_VALUE:
		pointer_desc = "value ";
		break;
	case PTR_TO_CTX:
		pointer_desc = "context ";
		break;
	case PTR_TO_STACK:
		pointer_desc = "stack ";
2977 2978 2979 2980 2981
		/* The stack spill tracking logic in check_stack_write()
		 * and check_stack_read() relies on stack accesses being
		 * aligned.
		 */
		strict = true;
2982
		break;
2983 2984 2985
	case PTR_TO_SOCKET:
		pointer_desc = "sock ";
		break;
2986 2987 2988
	case PTR_TO_SOCK_COMMON:
		pointer_desc = "sock_common ";
		break;
2989 2990 2991
	case PTR_TO_TCP_SOCK:
		pointer_desc = "tcp_sock ";
		break;
2992 2993 2994
	case PTR_TO_XDP_SOCK:
		pointer_desc = "xdp_sock ";
		break;
2995
	default:
2996
		break;
2997
	}
2998 2999
	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
					   strict);
3000 3001
}

3002 3003 3004 3005
static int update_stack_depth(struct bpf_verifier_env *env,
			      const struct bpf_func_state *func,
			      int off)
{
3006
	u16 stack = env->subprog_info[func->subprogno].stack_depth;
3007 3008 3009 3010 3011

	if (stack >= -off)
		return 0;

	/* update known max for given subprogram */
3012
	env->subprog_info[func->subprogno].stack_depth = -off;
3013 3014
	return 0;
}
3015

3016 3017 3018 3019 3020 3021 3022 3023
/* starting from main bpf function walk all instructions of the function
 * and recursively walk all callees that given function can call.
 * Ignore jump and exit insns.
 * Since recursion is prevented by check_cfg() this algorithm
 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
 */
static int check_max_stack_depth(struct bpf_verifier_env *env)
{
3024 3025
	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
	struct bpf_subprog_info *subprog = env->subprog_info;
3026
	struct bpf_insn *insn = env->prog->insnsi;
3027
	bool tail_call_reachable = false;
3028 3029
	int ret_insn[MAX_CALL_FRAMES];
	int ret_prog[MAX_CALL_FRAMES];
3030
	int j;
3031

3032
process_func:
3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057
	/* protect against potential stack overflow that might happen when
	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
	 * depth for such case down to 256 so that the worst case scenario
	 * would result in 8k stack size (32 which is tailcall limit * 256 =
	 * 8k).
	 *
	 * To get the idea what might happen, see an example:
	 * func1 -> sub rsp, 128
	 *  subfunc1 -> sub rsp, 256
	 *  tailcall1 -> add rsp, 256
	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
	 *   subfunc2 -> sub rsp, 64
	 *   subfunc22 -> sub rsp, 128
	 *   tailcall2 -> add rsp, 128
	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
	 *
	 * tailcall will unwind the current stack frame but it will not get rid
	 * of caller's stack as shown on the example above.
	 */
	if (idx && subprog[idx].has_tail_call && depth >= 256) {
		verbose(env,
			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
			depth);
		return -EACCES;
	}
3058 3059 3060
	/* round up to 32-bytes, since this is granularity
	 * of interpreter stack size
	 */
3061
	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3062
	if (depth > MAX_BPF_STACK) {
3063
		verbose(env, "combined stack size of %d calls is %d. Too large\n",
3064
			frame + 1, depth);
3065 3066
		return -EACCES;
	}
3067
continue_func:
J
Jiong Wang 已提交
3068
	subprog_end = subprog[idx + 1].start;
3069 3070 3071 3072 3073 3074 3075
	for (; i < subprog_end; i++) {
		if (insn[i].code != (BPF_JMP | BPF_CALL))
			continue;
		if (insn[i].src_reg != BPF_PSEUDO_CALL)
			continue;
		/* remember insn and function to return to */
		ret_insn[frame] = i + 1;
3076
		ret_prog[frame] = idx;
3077 3078 3079

		/* find the callee */
		i = i + insn[i].imm + 1;
3080 3081
		idx = find_subprog(env, i);
		if (idx < 0) {
3082 3083 3084 3085
			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
				  i);
			return -EFAULT;
		}
3086 3087 3088 3089

		if (subprog[idx].has_tail_call)
			tail_call_reachable = true;

3090 3091
		frame++;
		if (frame >= MAX_CALL_FRAMES) {
3092 3093 3094
			verbose(env, "the call stack of %d frames is too deep !\n",
				frame);
			return -E2BIG;
3095 3096 3097
		}
		goto process_func;
	}
3098 3099 3100 3101 3102 3103 3104 3105 3106
	/* if tail call got detected across bpf2bpf calls then mark each of the
	 * currently present subprog frames as tail call reachable subprogs;
	 * this info will be utilized by JIT so that we will be preserving the
	 * tail call counter throughout bpf2bpf calls combined with tailcalls
	 */
	if (tail_call_reachable)
		for (j = 0; j < frame; j++)
			subprog[ret_prog[j]].tail_call_reachable = true;

3107 3108 3109 3110 3111
	/* end of for() loop means the last insn of the 'subprog'
	 * was reached. Doesn't matter whether it was JA or EXIT
	 */
	if (frame == 0)
		return 0;
3112
	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3113 3114
	frame--;
	i = ret_insn[frame];
3115
	idx = ret_prog[frame];
3116
	goto continue_func;
3117 3118
}

3119
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130
static int get_callee_stack_depth(struct bpf_verifier_env *env,
				  const struct bpf_insn *insn, int idx)
{
	int start = idx + insn->imm + 1, subprog;

	subprog = find_subprog(env, start);
	if (subprog < 0) {
		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
			  start);
		return -EFAULT;
	}
3131
	return env->subprog_info[subprog].stack_depth;
3132
}
3133
#endif
3134

3135 3136
int check_ctx_reg(struct bpf_verifier_env *env,
		  const struct bpf_reg_state *reg, int regno)
3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158
{
	/* Access to ctx or passing it to a helper is only allowed in
	 * its original, unmodified form.
	 */

	if (reg->off) {
		verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
			regno, reg->off);
		return -EACCES;
	}

	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
		char tn_buf[48];

		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
		verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
		return -EACCES;
	}

	return 0;
}

3159 3160 3161 3162
static int __check_buffer_access(struct bpf_verifier_env *env,
				 const char *buf_info,
				 const struct bpf_reg_state *reg,
				 int regno, int off, int size)
3163 3164 3165
{
	if (off < 0) {
		verbose(env,
3166
			"R%d invalid %s buffer access: off=%d, size=%d\n",
3167
			regno, buf_info, off, size);
3168 3169 3170 3171 3172 3173 3174
		return -EACCES;
	}
	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
		char tn_buf[48];

		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
		verbose(env,
3175
			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
3176 3177 3178
			regno, off, tn_buf);
		return -EACCES;
	}
3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192

	return 0;
}

static int check_tp_buffer_access(struct bpf_verifier_env *env,
				  const struct bpf_reg_state *reg,
				  int regno, int off, int size)
{
	int err;

	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
	if (err)
		return err;

3193 3194 3195 3196 3197 3198
	if (off + size > env->prog->aux->max_tp_access)
		env->prog->aux->max_tp_access = off + size;

	return 0;
}

3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217
static int check_buffer_access(struct bpf_verifier_env *env,
			       const struct bpf_reg_state *reg,
			       int regno, int off, int size,
			       bool zero_size_allowed,
			       const char *buf_info,
			       u32 *max_access)
{
	int err;

	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
	if (err)
		return err;

	if (off + size > *max_access)
		*max_access = off + size;

	return 0;
}

3218 3219 3220 3221 3222 3223
/* BPF architecture zero extends alu32 ops into 64-bit registesr */
static void zext_32_to_64(struct bpf_reg_state *reg)
{
	reg->var_off = tnum_subreg(reg->var_off);
	__reg_assign_32_into_64(reg);
}
3224

3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245
/* truncate register to smaller size (in bytes)
 * must be called with size < BPF_REG_SIZE
 */
static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
{
	u64 mask;

	/* clear high bits in bit representation */
	reg->var_off = tnum_cast(reg->var_off, size);

	/* fix arithmetic bounds */
	mask = ((u64)1 << (size * 8)) - 1;
	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
		reg->umin_value &= mask;
		reg->umax_value &= mask;
	} else {
		reg->umin_value = 0;
		reg->umax_value = mask;
	}
	reg->smin_value = reg->umin_value;
	reg->smax_value = reg->umax_value;
3246 3247 3248 3249 3250 3251 3252 3253

	/* If size is smaller than 32bit register the 32bit register
	 * values are also truncated so we push 64-bit bounds into
	 * 32-bit bounds. Above were truncated < 32-bits already.
	 */
	if (size >= 4)
		return;
	__reg_combine_64_into_32(reg);
3254 3255
}

3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269
static bool bpf_map_is_rdonly(const struct bpf_map *map)
{
	return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
}

static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
{
	void *ptr;
	u64 addr;
	int err;

	err = map->ops->map_direct_value_addr(map, &addr, off);
	if (err)
		return err;
3270
	ptr = (void *)(long)addr + off;
3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290

	switch (size) {
	case sizeof(u8):
		*val = (u64)*(u8 *)ptr;
		break;
	case sizeof(u16):
		*val = (u64)*(u16 *)ptr;
		break;
	case sizeof(u32):
		*val = (u64)*(u32 *)ptr;
		break;
	case sizeof(u64):
		*val = *(u64 *)ptr;
		break;
	default:
		return -EINVAL;
	}
	return 0;
}

3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
				   struct bpf_reg_state *regs,
				   int regno, int off, int size,
				   enum bpf_access_type atype,
				   int value_regno)
{
	struct bpf_reg_state *reg = regs + regno;
	const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
	const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
	u32 btf_id;
	int ret;

	if (off < 0) {
		verbose(env,
			"R%d is ptr_%s invalid negative access: off=%d\n",
			regno, tname, off);
		return -EACCES;
	}
	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
		char tn_buf[48];

		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
		verbose(env,
			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
			regno, tname, off, tn_buf);
		return -EACCES;
	}

3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331
	if (env->ops->btf_struct_access) {
		ret = env->ops->btf_struct_access(&env->log, t, off, size,
						  atype, &btf_id);
	} else {
		if (atype != BPF_READ) {
			verbose(env, "only read is supported\n");
			return -EACCES;
		}

		ret = btf_struct_access(&env->log, t, off, size, atype,
					&btf_id);
	}

3332 3333 3334
	if (ret < 0)
		return ret;

3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372
	if (atype == BPF_READ && value_regno >= 0)
		mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);

	return 0;
}

static int check_ptr_to_map_access(struct bpf_verifier_env *env,
				   struct bpf_reg_state *regs,
				   int regno, int off, int size,
				   enum bpf_access_type atype,
				   int value_regno)
{
	struct bpf_reg_state *reg = regs + regno;
	struct bpf_map *map = reg->map_ptr;
	const struct btf_type *t;
	const char *tname;
	u32 btf_id;
	int ret;

	if (!btf_vmlinux) {
		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
		return -ENOTSUPP;
	}

	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
		verbose(env, "map_ptr access not supported for map type %d\n",
			map->map_type);
		return -ENOTSUPP;
	}

	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
	tname = btf_name_by_offset(btf_vmlinux, t->name_off);

	if (!env->allow_ptr_to_map_access) {
		verbose(env,
			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
			tname);
		return -EPERM;
3373
	}
3374

3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392
	if (off < 0) {
		verbose(env, "R%d is %s invalid negative access: off=%d\n",
			regno, tname, off);
		return -EACCES;
	}

	if (atype != BPF_READ) {
		verbose(env, "only read from %s is supported\n", tname);
		return -EACCES;
	}

	ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
	if (ret < 0)
		return ret;

	if (value_regno >= 0)
		mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);

3393 3394 3395
	return 0;
}

3396

3397 3398 3399 3400 3401 3402
/* check whether memory at (regno + off) is accessible for t = (read | write)
 * if t==write, value_regno is a register which value is stored into memory
 * if t==read, value_regno is a register which will receive the value from memory
 * if t==write && value_regno==-1, some unknown value is stored into memory
 * if t==read && value_regno==-1, don't care what we read from memory
 */
3403 3404 3405
static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
			    int off, int bpf_size, enum bpf_access_type t,
			    int value_regno, bool strict_alignment_once)
3406
{
3407 3408
	struct bpf_reg_state *regs = cur_regs(env);
	struct bpf_reg_state *reg = regs + regno;
3409
	struct bpf_func_state *state;
3410 3411 3412 3413 3414 3415
	int size, err = 0;

	size = bpf_size_to_bytes(bpf_size);
	if (size < 0)
		return size;

3416
	/* alignment checks will add in reg->off themselves */
3417
	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
A
Alexei Starovoitov 已提交
3418 3419
	if (err)
		return err;
3420

3421 3422 3423 3424
	/* for access checks, reg->off is just part of off */
	off += reg->off;

	if (reg->type == PTR_TO_MAP_VALUE) {
3425 3426
		if (t == BPF_WRITE && value_regno >= 0 &&
		    is_pointer_value(env, value_regno)) {
3427
			verbose(env, "R%d leaks addr into map\n", value_regno);
3428 3429
			return -EACCES;
		}
3430 3431 3432
		err = check_map_access_type(env, regno, off, size, t);
		if (err)
			return err;
3433
		err = check_map_access(env, regno, off, size, false);
3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454
		if (!err && t == BPF_READ && value_regno >= 0) {
			struct bpf_map *map = reg->map_ptr;

			/* if map is read-only, track its contents as scalars */
			if (tnum_is_const(reg->var_off) &&
			    bpf_map_is_rdonly(map) &&
			    map->ops->map_direct_value_addr) {
				int map_off = off + reg->var_off.value;
				u64 val = 0;

				err = bpf_map_direct_read(map, map_off, size,
							  &val);
				if (err)
					return err;

				regs[value_regno].type = SCALAR_VALUE;
				__mark_reg_known(&regs[value_regno], val);
			} else {
				mark_reg_unknown(env, regs, value_regno);
			}
		}
3455 3456 3457 3458 3459 3460 3461 3462 3463 3464
	} else if (reg->type == PTR_TO_MEM) {
		if (t == BPF_WRITE && value_regno >= 0 &&
		    is_pointer_value(env, value_regno)) {
			verbose(env, "R%d leaks addr into mem\n", value_regno);
			return -EACCES;
		}
		err = check_mem_region_access(env, regno, off, size,
					      reg->mem_size, false);
		if (!err && t == BPF_READ && value_regno >= 0)
			mark_reg_unknown(env, regs, value_regno);
A
Alexei Starovoitov 已提交
3465
	} else if (reg->type == PTR_TO_CTX) {
3466
		enum bpf_reg_type reg_type = SCALAR_VALUE;
3467
		u32 btf_id = 0;
3468

3469 3470
		if (t == BPF_WRITE && value_regno >= 0 &&
		    is_pointer_value(env, value_regno)) {
3471
			verbose(env, "R%d leaks addr into ctx\n", value_regno);
3472 3473
			return -EACCES;
		}
3474

3475 3476 3477 3478
		err = check_ctx_reg(env, reg, regno);
		if (err < 0)
			return err;

3479 3480 3481
		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
		if (err)
			verbose_linfo(env, insn_idx, "; ");
A
Alexei Starovoitov 已提交
3482
		if (!err && t == BPF_READ && value_regno >= 0) {
3483
			/* ctx access returns either a scalar, or a
3484 3485
			 * PTR_TO_PACKET[_META,_END]. In the latter
			 * case, we know the offset is zero.
3486
			 */
3487
			if (reg_type == SCALAR_VALUE) {
3488
				mark_reg_unknown(env, regs, value_regno);
3489
			} else {
3490
				mark_reg_known_zero(env, regs,
3491
						    value_regno);
3492 3493
				if (reg_type_may_be_null(reg_type))
					regs[value_regno].id = ++env->id_gen;
3494 3495 3496 3497 3498 3499
				/* A load of ctx field could have different
				 * actual load size with the one encoded in the
				 * insn. When the dst is PTR, it is for sure not
				 * a sub-register.
				 */
				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
3500 3501
				if (reg_type == PTR_TO_BTF_ID ||
				    reg_type == PTR_TO_BTF_ID_OR_NULL)
3502
					regs[value_regno].btf_id = btf_id;
3503
			}
3504
			regs[value_regno].type = reg_type;
A
Alexei Starovoitov 已提交
3505
		}
3506

3507 3508
	} else if (reg->type == PTR_TO_STACK) {
		off += reg->var_off.value;
3509 3510 3511
		err = check_stack_access(env, reg, off, size);
		if (err)
			return err;
3512

3513 3514 3515 3516
		state = func(env, reg);
		err = update_stack_depth(env, state, off);
		if (err)
			return err;
3517

3518
		if (t == BPF_WRITE)
3519
			err = check_stack_write(env, state, off, size,
3520
						value_regno, insn_idx);
3521
		else
3522 3523
			err = check_stack_read(env, state, off, size,
					       value_regno);
3524
	} else if (reg_is_pkt_pointer(reg)) {
3525
		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
3526
			verbose(env, "cannot write into packet\n");
A
Alexei Starovoitov 已提交
3527 3528
			return -EACCES;
		}
3529 3530
		if (t == BPF_WRITE && value_regno >= 0 &&
		    is_pointer_value(env, value_regno)) {
3531 3532
			verbose(env, "R%d leaks addr into packet\n",
				value_regno);
3533 3534
			return -EACCES;
		}
3535
		err = check_packet_access(env, regno, off, size, false);
A
Alexei Starovoitov 已提交
3536
		if (!err && t == BPF_READ && value_regno >= 0)
3537
			mark_reg_unknown(env, regs, value_regno);
3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548
	} else if (reg->type == PTR_TO_FLOW_KEYS) {
		if (t == BPF_WRITE && value_regno >= 0 &&
		    is_pointer_value(env, value_regno)) {
			verbose(env, "R%d leaks addr into flow keys\n",
				value_regno);
			return -EACCES;
		}

		err = check_flow_keys_access(env, off, size);
		if (!err && t == BPF_READ && value_regno >= 0)
			mark_reg_unknown(env, regs, value_regno);
3549
	} else if (type_is_sk_pointer(reg->type)) {
3550
		if (t == BPF_WRITE) {
3551 3552
			verbose(env, "R%d cannot write into %s\n",
				regno, reg_type_str[reg->type]);
3553 3554
			return -EACCES;
		}
3555
		err = check_sock_access(env, insn_idx, regno, off, size, t);
3556 3557
		if (!err && value_regno >= 0)
			mark_reg_unknown(env, regs, value_regno);
3558 3559 3560 3561
	} else if (reg->type == PTR_TO_TP_BUFFER) {
		err = check_tp_buffer_access(env, reg, regno, off, size);
		if (!err && t == BPF_READ && value_regno >= 0)
			mark_reg_unknown(env, regs, value_regno);
3562 3563 3564
	} else if (reg->type == PTR_TO_BTF_ID) {
		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
					      value_regno);
3565 3566 3567
	} else if (reg->type == CONST_PTR_TO_MAP) {
		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
					      value_regno);
3568 3569 3570 3571 3572 3573
	} else if (reg->type == PTR_TO_RDONLY_BUF) {
		if (t == BPF_WRITE) {
			verbose(env, "R%d cannot write into %s\n",
				regno, reg_type_str[reg->type]);
			return -EACCES;
		}
3574 3575
		err = check_buffer_access(env, reg, regno, off, size, false,
					  "rdonly",
3576 3577 3578 3579
					  &env->prog->aux->max_rdonly_access);
		if (!err && value_regno >= 0)
			mark_reg_unknown(env, regs, value_regno);
	} else if (reg->type == PTR_TO_RDWR_BUF) {
3580 3581
		err = check_buffer_access(env, reg, regno, off, size, false,
					  "rdwr",
3582 3583 3584
					  &env->prog->aux->max_rdwr_access);
		if (!err && t == BPF_READ && value_regno >= 0)
			mark_reg_unknown(env, regs, value_regno);
3585
	} else {
3586 3587
		verbose(env, "R%d invalid mem access '%s'\n", regno,
			reg_type_str[reg->type]);
3588 3589
		return -EACCES;
	}
A
Alexei Starovoitov 已提交
3590

3591
	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
3592
	    regs[value_regno].type == SCALAR_VALUE) {
3593
		/* b/h/w load zero-extends, mark upper bits as known 0 */
3594
		coerce_reg_to_size(&regs[value_regno], size);
A
Alexei Starovoitov 已提交
3595
	}
3596 3597 3598
	return err;
}

3599
static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
3600 3601 3602 3603 3604
{
	int err;

	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
	    insn->imm != 0) {
3605
		verbose(env, "BPF_XADD uses reserved fields\n");
3606 3607 3608 3609
		return -EINVAL;
	}

	/* check src1 operand */
3610
	err = check_reg_arg(env, insn->src_reg, SRC_OP);
3611 3612 3613 3614
	if (err)
		return err;

	/* check src2 operand */
3615
	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3616 3617 3618
	if (err)
		return err;

3619
	if (is_pointer_value(env, insn->src_reg)) {
3620
		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
3621 3622 3623
		return -EACCES;
	}

3624
	if (is_ctx_reg(env, insn->dst_reg) ||
3625
	    is_pkt_reg(env, insn->dst_reg) ||
3626 3627
	    is_flow_key_reg(env, insn->dst_reg) ||
	    is_sk_reg(env, insn->dst_reg)) {
3628
		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
3629 3630
			insn->dst_reg,
			reg_type_str[reg_state(env, insn->dst_reg)->type]);
3631 3632 3633
		return -EACCES;
	}

3634
	/* check whether atomic_add can read the memory */
3635
	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3636
			       BPF_SIZE(insn->code), BPF_READ, -1, true);
3637 3638 3639 3640
	if (err)
		return err;

	/* check whether atomic_add can write into the same memory */
3641
	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3642
				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
3643 3644
}

3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667
static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
				  int off, int access_size,
				  bool zero_size_allowed)
{
	struct bpf_reg_state *reg = reg_state(env, regno);

	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
		if (tnum_is_const(reg->var_off)) {
			verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
				regno, off, access_size);
		} else {
			char tn_buf[48];

			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
			verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
				regno, tn_buf, access_size);
		}
		return -EACCES;
	}
	return 0;
}

3668 3669
/* when register 'regno' is passed into function that will read 'access_size'
 * bytes from that pointer, make sure that it's within stack boundary
3670 3671 3672
 * and all elements of stack are initialized.
 * Unlike most pointer bounds-checking functions, this one doesn't take an
 * 'off' argument, so it has to add in reg->off itself.
3673
 */
3674
static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
3675 3676
				int access_size, bool zero_size_allowed,
				struct bpf_call_arg_meta *meta)
3677
{
3678
	struct bpf_reg_state *reg = reg_state(env, regno);
3679
	struct bpf_func_state *state = func(env, reg);
3680
	int err, min_off, max_off, i, j, slot, spi;
3681

3682 3683 3684 3685 3686 3687 3688
	if (tnum_is_const(reg->var_off)) {
		min_off = max_off = reg->var_off.value + reg->off;
		err = __check_stack_boundary(env, regno, min_off, access_size,
					     zero_size_allowed);
		if (err)
			return err;
	} else {
3689 3690 3691 3692 3693
		/* Variable offset is prohibited for unprivileged mode for
		 * simplicity since it requires corresponding support in
		 * Spectre masking for stack ALU.
		 * See also retrieve_ptr_limit().
		 */
A
Alexei Starovoitov 已提交
3694
		if (!env->bypass_spec_v1) {
3695
			char tn_buf[48];
3696

3697 3698 3699 3700 3701
			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
			verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
				regno, tn_buf);
			return -EACCES;
		}
3702 3703 3704 3705 3706 3707 3708 3709 3710
		/* Only initialized buffer on stack is allowed to be accessed
		 * with variable offset. With uninitialized buffer it's hard to
		 * guarantee that whole memory is marked as initialized on
		 * helper return since specific bounds are unknown what may
		 * cause uninitialized stack leaking.
		 */
		if (meta && meta->raw_mode)
			meta = NULL;

3711 3712 3713 3714 3715 3716
		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
		    reg->smax_value <= -BPF_MAX_VAR_OFF) {
			verbose(env, "R%d unbounded indirect variable offset stack access\n",
				regno);
			return -EACCES;
		}
3717
		min_off = reg->smin_value + reg->off;
3718
		max_off = reg->smax_value + reg->off;
3719 3720
		err = __check_stack_boundary(env, regno, min_off, access_size,
					     zero_size_allowed);
3721 3722 3723
		if (err) {
			verbose(env, "R%d min value is outside of stack bound\n",
				regno);
3724
			return err;
3725
		}
3726 3727
		err = __check_stack_boundary(env, regno, max_off, access_size,
					     zero_size_allowed);
3728 3729 3730
		if (err) {
			verbose(env, "R%d max value is outside of stack bound\n",
				regno);
3731
			return err;
3732
		}
3733 3734
	}

3735 3736 3737 3738 3739 3740
	if (meta && meta->raw_mode) {
		meta->access_size = access_size;
		meta->regno = regno;
		return 0;
	}

3741
	for (i = min_off; i < max_off + access_size; i++) {
3742 3743
		u8 *stype;

3744
		slot = -i - 1;
3745
		spi = slot / BPF_REG_SIZE;
3746 3747 3748 3749 3750 3751 3752 3753 3754
		if (state->allocated_stack <= slot)
			goto err;
		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
		if (*stype == STACK_MISC)
			goto mark;
		if (*stype == STACK_ZERO) {
			/* helper can write anything into the stack */
			*stype = STACK_MISC;
			goto mark;
3755
		}
3756 3757 3758 3759 3760

		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
		    state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
			goto mark;

3761 3762
		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
		    state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
3763
			__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
3764 3765 3766 3767 3768
			for (j = 0; j < BPF_REG_SIZE; j++)
				state->stack[spi].slot_type[j] = STACK_MISC;
			goto mark;
		}

3769
err:
3770 3771 3772 3773 3774 3775 3776 3777 3778 3779
		if (tnum_is_const(reg->var_off)) {
			verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
				min_off, i - min_off, access_size);
		} else {
			char tn_buf[48];

			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
			verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
				tn_buf, i - min_off, access_size);
		}
3780 3781 3782 3783 3784
		return -EACCES;
mark:
		/* reading any byte out of 8-byte 'spill_slot' will cause
		 * the whole slot to be marked as 'read'
		 */
3785
		mark_reg_read(env, &state->stack[spi].spilled_ptr,
3786 3787
			      state->stack[spi].spilled_ptr.parent,
			      REG_LIVE_READ64);
3788
	}
3789
	return update_stack_depth(env, state, min_off);
3790 3791
}

3792 3793 3794 3795
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
				   int access_size, bool zero_size_allowed,
				   struct bpf_call_arg_meta *meta)
{
3796
	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
3797

3798
	switch (reg->type) {
3799
	case PTR_TO_PACKET:
3800
	case PTR_TO_PACKET_META:
3801 3802
		return check_packet_access(env, regno, reg->off, access_size,
					   zero_size_allowed);
3803
	case PTR_TO_MAP_VALUE:
3804 3805 3806 3807
		if (check_map_access_type(env, regno, reg->off, access_size,
					  meta && meta->raw_mode ? BPF_WRITE :
					  BPF_READ))
			return -EACCES;
3808 3809
		return check_map_access(env, regno, reg->off, access_size,
					zero_size_allowed);
3810 3811 3812 3813
	case PTR_TO_MEM:
		return check_mem_region_access(env, regno, reg->off,
					       access_size, reg->mem_size,
					       zero_size_allowed);
3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825
	case PTR_TO_RDONLY_BUF:
		if (meta && meta->raw_mode)
			return -EACCES;
		return check_buffer_access(env, reg, regno, reg->off,
					   access_size, zero_size_allowed,
					   "rdonly",
					   &env->prog->aux->max_rdonly_access);
	case PTR_TO_RDWR_BUF:
		return check_buffer_access(env, reg, regno, reg->off,
					   access_size, zero_size_allowed,
					   "rdwr",
					   &env->prog->aux->max_rdwr_access);
3826
	case PTR_TO_STACK:
3827 3828
		return check_stack_boundary(env, regno, access_size,
					    zero_size_allowed, meta);
3829 3830 3831 3832 3833 3834 3835 3836 3837 3838
	default: /* scalar_value or invalid ptr */
		/* Allow zero-byte read from NULL, regardless of pointer type */
		if (zero_size_allowed && access_size == 0 &&
		    register_is_null(reg))
			return 0;

		verbose(env, "R%d type=%s expected=%s\n", regno,
			reg_type_str[reg->type],
			reg_type_str[PTR_TO_STACK]);
		return -EACCES;
3839 3840 3841
	}
}

3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922
/* Implementation details:
 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
 * Two bpf_map_lookups (even with the same key) will have different reg->id.
 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
 * value_or_null->value transition, since the verifier only cares about
 * the range of access to valid map value pointer and doesn't care about actual
 * address of the map element.
 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
 * reg->id > 0 after value_or_null->value transition. By doing so
 * two bpf_map_lookups will be considered two different pointers that
 * point to different bpf_spin_locks.
 * The verifier allows taking only one bpf_spin_lock at a time to avoid
 * dead-locks.
 * Since only one bpf_spin_lock is allowed the checks are simpler than
 * reg_is_refcounted() logic. The verifier needs to remember only
 * one spin_lock instead of array of acquired_refs.
 * cur_state->active_spin_lock remembers which map value element got locked
 * and clears it after bpf_spin_unlock.
 */
static int process_spin_lock(struct bpf_verifier_env *env, int regno,
			     bool is_lock)
{
	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
	struct bpf_verifier_state *cur = env->cur_state;
	bool is_const = tnum_is_const(reg->var_off);
	struct bpf_map *map = reg->map_ptr;
	u64 val = reg->var_off.value;

	if (!is_const) {
		verbose(env,
			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
			regno);
		return -EINVAL;
	}
	if (!map->btf) {
		verbose(env,
			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
			map->name);
		return -EINVAL;
	}
	if (!map_value_has_spin_lock(map)) {
		if (map->spin_lock_off == -E2BIG)
			verbose(env,
				"map '%s' has more than one 'struct bpf_spin_lock'\n",
				map->name);
		else if (map->spin_lock_off == -ENOENT)
			verbose(env,
				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
				map->name);
		else
			verbose(env,
				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
				map->name);
		return -EINVAL;
	}
	if (map->spin_lock_off != val + reg->off) {
		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
			val + reg->off);
		return -EINVAL;
	}
	if (is_lock) {
		if (cur->active_spin_lock) {
			verbose(env,
				"Locking two bpf_spin_locks are not allowed\n");
			return -EINVAL;
		}
		cur->active_spin_lock = reg->id;
	} else {
		if (!cur->active_spin_lock) {
			verbose(env, "bpf_spin_unlock without taking a lock\n");
			return -EINVAL;
		}
		if (cur->active_spin_lock != reg->id) {
			verbose(env, "bpf_spin_unlock of different lock\n");
			return -EINVAL;
		}
		cur->active_spin_lock = 0;
	}
	return 0;
}

3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935
static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
{
	return type == ARG_PTR_TO_MEM ||
	       type == ARG_PTR_TO_MEM_OR_NULL ||
	       type == ARG_PTR_TO_UNINIT_MEM;
}

static bool arg_type_is_mem_size(enum bpf_arg_type type)
{
	return type == ARG_CONST_SIZE ||
	       type == ARG_CONST_SIZE_OR_ZERO;
}

3936 3937 3938 3939 3940
static bool arg_type_is_alloc_size(enum bpf_arg_type type)
{
	return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
}

3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956
static bool arg_type_is_int_ptr(enum bpf_arg_type type)
{
	return type == ARG_PTR_TO_INT ||
	       type == ARG_PTR_TO_LONG;
}

static int int_ptr_type_to_size(enum bpf_arg_type type)
{
	if (type == ARG_PTR_TO_INT)
		return sizeof(u32);
	else if (type == ARG_PTR_TO_LONG)
		return sizeof(u64);

	return -EINVAL;
}

3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970
static int resolve_map_arg_type(struct bpf_verifier_env *env,
				 const struct bpf_call_arg_meta *meta,
				 enum bpf_arg_type *arg_type)
{
	if (!meta->map_ptr) {
		/* kernel subsystem misconfigured verifier */
		verbose(env, "invalid map_ptr to access map->type\n");
		return -EACCES;
	}

	switch (meta->map_ptr->map_type) {
	case BPF_MAP_TYPE_SOCKMAP:
	case BPF_MAP_TYPE_SOCKHASH:
		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
3971
			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983
		} else {
			verbose(env, "invalid arg_type for sockmap/sockhash\n");
			return -EINVAL;
		}
		break;

	default:
		break;
	}
	return 0;
}

3984 3985
struct bpf_reg_types {
	const enum bpf_reg_type types[10];
3986
	u32 *btf_id;
3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006
};

static const struct bpf_reg_types map_key_value_types = {
	.types = {
		PTR_TO_STACK,
		PTR_TO_PACKET,
		PTR_TO_PACKET_META,
		PTR_TO_MAP_VALUE,
	},
};

static const struct bpf_reg_types sock_types = {
	.types = {
		PTR_TO_SOCK_COMMON,
		PTR_TO_SOCKET,
		PTR_TO_TCP_SOCK,
		PTR_TO_XDP_SOCK,
	},
};

4007
#ifdef CONFIG_NET
4008 4009 4010 4011 4012 4013 4014 4015 4016 4017
static const struct bpf_reg_types btf_id_sock_common_types = {
	.types = {
		PTR_TO_SOCK_COMMON,
		PTR_TO_SOCKET,
		PTR_TO_TCP_SOCK,
		PTR_TO_XDP_SOCK,
		PTR_TO_BTF_ID,
	},
	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
};
4018
#endif
4019

4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047
static const struct bpf_reg_types mem_types = {
	.types = {
		PTR_TO_STACK,
		PTR_TO_PACKET,
		PTR_TO_PACKET_META,
		PTR_TO_MAP_VALUE,
		PTR_TO_MEM,
		PTR_TO_RDONLY_BUF,
		PTR_TO_RDWR_BUF,
	},
};

static const struct bpf_reg_types int_ptr_types = {
	.types = {
		PTR_TO_STACK,
		PTR_TO_PACKET,
		PTR_TO_PACKET_META,
		PTR_TO_MAP_VALUE,
	},
};

static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
H
Hao Luo 已提交
4048
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
4049

4050
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061
	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
	[ARG_PTR_TO_MAP_VALUE]		= &map_key_value_types,
	[ARG_PTR_TO_UNINIT_MAP_VALUE]	= &map_key_value_types,
	[ARG_PTR_TO_MAP_VALUE_OR_NULL]	= &map_key_value_types,
	[ARG_CONST_SIZE]		= &scalar_types,
	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
	[ARG_PTR_TO_CTX]		= &context_types,
	[ARG_PTR_TO_CTX_OR_NULL]	= &context_types,
	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
4062
#ifdef CONFIG_NET
4063
	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
4064
#endif
4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075
	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
	[ARG_PTR_TO_SOCKET_OR_NULL]	= &fullsock_types,
	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
	[ARG_PTR_TO_MEM]		= &mem_types,
	[ARG_PTR_TO_MEM_OR_NULL]	= &mem_types,
	[ARG_PTR_TO_UNINIT_MEM]		= &mem_types,
	[ARG_PTR_TO_ALLOC_MEM]		= &alloc_mem_types,
	[ARG_PTR_TO_ALLOC_MEM_OR_NULL]	= &alloc_mem_types,
	[ARG_PTR_TO_INT]		= &int_ptr_types,
	[ARG_PTR_TO_LONG]		= &int_ptr_types,
H
Hao Luo 已提交
4076
	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
4077 4078 4079
};

static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
4080 4081
			  enum bpf_arg_type arg_type,
			  const u32 *arg_btf_id)
4082 4083 4084
{
	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
	enum bpf_reg_type expected, type = reg->type;
4085
	const struct bpf_reg_types *compatible;
4086 4087
	int i, j;

4088 4089 4090 4091 4092 4093
	compatible = compatible_reg_types[arg_type];
	if (!compatible) {
		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
		return -EFAULT;
	}

4094 4095 4096 4097 4098 4099
	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
		expected = compatible->types[i];
		if (expected == NOT_INIT)
			break;

		if (type == expected)
4100
			goto found;
4101 4102 4103 4104 4105 4106 4107
	}

	verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
	for (j = 0; j + 1 < i; j++)
		verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
	verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
	return -EACCES;
4108 4109 4110

found:
	if (type == PTR_TO_BTF_ID) {
4111 4112 4113 4114 4115 4116 4117 4118
		if (!arg_btf_id) {
			if (!compatible->btf_id) {
				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
				return -EFAULT;
			}
			arg_btf_id = compatible->btf_id;
		}

4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134
		if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
					  *arg_btf_id)) {
			verbose(env, "R%d is of type %s but %s is expected\n",
				regno, kernel_type_name(reg->btf_id),
				kernel_type_name(*arg_btf_id));
			return -EACCES;
		}

		if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
			verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
				regno);
			return -EACCES;
		}
	}

	return 0;
4135 4136
}

4137 4138 4139
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
			  struct bpf_call_arg_meta *meta,
			  const struct bpf_func_proto *fn)
4140
{
4141
	u32 regno = BPF_REG_1 + arg;
4142
	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4143
	enum bpf_arg_type arg_type = fn->arg_type[arg];
4144
	enum bpf_reg_type type = reg->type;
4145 4146
	int err = 0;

4147
	if (arg_type == ARG_DONTCARE)
4148 4149
		return 0;

4150 4151 4152
	err = check_reg_arg(env, regno, SRC_OP);
	if (err)
		return err;
4153

4154 4155
	if (arg_type == ARG_ANYTHING) {
		if (is_pointer_value(env, regno)) {
4156 4157
			verbose(env, "R%d leaks addr into helper function\n",
				regno);
4158 4159
			return -EACCES;
		}
4160
		return 0;
4161
	}
4162

4163
	if (type_is_pkt_pointer(type) &&
4164
	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
4165
		verbose(env, "helper access to the packet is not allowed\n");
4166 4167 4168
		return -EACCES;
	}

4169 4170 4171 4172 4173 4174 4175 4176
	if (arg_type == ARG_PTR_TO_MAP_VALUE ||
	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
	    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
		err = resolve_map_arg_type(env, meta, &arg_type);
		if (err)
			return err;
	}

4177 4178 4179 4180 4181 4182
	if (register_is_null(reg) && arg_type_may_be_null(arg_type))
		/* A NULL register has a SCALAR_VALUE type, so skip
		 * type checking.
		 */
		goto skip_type_check;

4183
	err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
4184 4185 4186
	if (err)
		return err;

4187
	if (type == PTR_TO_CTX) {
4188 4189 4190
		err = check_ctx_reg(env, reg, regno);
		if (err < 0)
			return err;
4191 4192
	}

4193
skip_type_check:
4194
	if (reg->ref_obj_id) {
4195 4196 4197 4198 4199 4200 4201
		if (meta->ref_obj_id) {
			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
				regno, reg->ref_obj_id,
				meta->ref_obj_id);
			return -EFAULT;
		}
		meta->ref_obj_id = reg->ref_obj_id;
4202 4203 4204 4205
	}

	if (arg_type == ARG_CONST_MAP_PTR) {
		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
4206
		meta->map_ptr = reg->map_ptr;
4207 4208 4209 4210 4211
	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
		/* bpf_map_xxx(..., map_ptr, ..., key) call:
		 * check that [key, key + map->key_size) are within
		 * stack limits and initialized
		 */
4212
		if (!meta->map_ptr) {
4213 4214 4215 4216 4217
			/* in function declaration map_ptr must come before
			 * map_key, so that it's verified and known before
			 * we have to check map_key here. Otherwise it means
			 * that kernel subsystem misconfigured verifier
			 */
4218
			verbose(env, "invalid map_ptr to access map->key\n");
4219 4220
			return -EACCES;
		}
4221 4222 4223
		err = check_helper_mem_access(env, regno,
					      meta->map_ptr->key_size, false,
					      NULL);
4224
	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4225 4226
		   (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
		    !register_is_null(reg)) ||
4227
		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4228 4229 4230
		/* bpf_map_xxx(..., map_ptr, ..., value) call:
		 * check [value, value + map->value_size) validity
		 */
4231
		if (!meta->map_ptr) {
4232
			/* kernel subsystem misconfigured verifier */
4233
			verbose(env, "invalid map_ptr to access map->value\n");
4234 4235
			return -EACCES;
		}
4236
		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
4237 4238
		err = check_helper_mem_access(env, regno,
					      meta->map_ptr->value_size, false,
4239
					      meta);
H
Hao Luo 已提交
4240 4241 4242 4243 4244 4245
	} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
		if (!reg->btf_id) {
			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
			return -EACCES;
		}
		meta->ret_btf_id = reg->btf_id;
4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256
	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
		if (meta->func_id == BPF_FUNC_spin_lock) {
			if (process_spin_lock(env, regno, true))
				return -EACCES;
		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
			if (process_spin_lock(env, regno, false))
				return -EACCES;
		} else {
			verbose(env, "verifier internal error\n");
			return -EFAULT;
		}
4257 4258 4259 4260 4261
	} else if (arg_type_is_mem_ptr(arg_type)) {
		/* The access to this pointer is only checked when we hit the
		 * next is_mem_size argument below.
		 */
		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
4262
	} else if (arg_type_is_mem_size(arg_type)) {
4263
		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
4264

4265 4266 4267 4268 4269 4270 4271
		/* This is used to refine r0 return value bounds for helpers
		 * that enforce this value as an upper bound on return values.
		 * See do_refine_retval_range() for helpers that can refine
		 * the return value. C type of helper is u32 so we pull register
		 * bound from umax_value however, if negative verifier errors
		 * out. Only upper bounds can be learned because retval is an
		 * int type and negative retvals are allowed.
4272
		 */
4273
		meta->msize_max_value = reg->umax_value;
4274

4275 4276
		/* The register is SCALAR_VALUE; the access check
		 * happens using its boundaries.
4277
		 */
4278
		if (!tnum_is_const(reg->var_off))
4279 4280 4281 4282 4283 4284 4285
			/* For unprivileged variable accesses, disable raw
			 * mode so that the program is required to
			 * initialize all the memory that the helper could
			 * just partially fill up.
			 */
			meta = NULL;

4286
		if (reg->smin_value < 0) {
4287
			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
4288 4289 4290
				regno);
			return -EACCES;
		}
4291

4292
		if (reg->umin_value == 0) {
4293 4294 4295
			err = check_helper_mem_access(env, regno - 1, 0,
						      zero_size_allowed,
						      meta);
4296 4297 4298
			if (err)
				return err;
		}
4299

4300
		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4301
			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
4302 4303 4304 4305
				regno);
			return -EACCES;
		}
		err = check_helper_mem_access(env, regno - 1,
4306
					      reg->umax_value,
4307
					      zero_size_allowed, meta);
4308 4309
		if (!err)
			err = mark_chain_precision(env, regno);
4310 4311 4312 4313 4314 4315 4316
	} else if (arg_type_is_alloc_size(arg_type)) {
		if (!tnum_is_const(reg->var_off)) {
			verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
				regno);
			return -EACCES;
		}
		meta->mem_size = reg->var_off.value;
4317 4318 4319 4320 4321 4322 4323
	} else if (arg_type_is_int_ptr(arg_type)) {
		int size = int_ptr_type_to_size(arg_type);

		err = check_helper_mem_access(env, regno, size, false, meta);
		if (err)
			return err;
		err = check_ptr_alignment(env, reg, 0, size, true);
4324 4325 4326 4327 4328
	}

	return err;
}

4329 4330 4331
static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
{
	enum bpf_attach_type eatype = env->prog->expected_attach_type;
4332
	enum bpf_prog_type type = resolve_prog_type(env->prog);
4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360

	if (func_id != BPF_FUNC_map_update_elem)
		return false;

	/* It's not possible to get access to a locked struct sock in these
	 * contexts, so updating is safe.
	 */
	switch (type) {
	case BPF_PROG_TYPE_TRACING:
		if (eatype == BPF_TRACE_ITER)
			return true;
		break;
	case BPF_PROG_TYPE_SOCKET_FILTER:
	case BPF_PROG_TYPE_SCHED_CLS:
	case BPF_PROG_TYPE_SCHED_ACT:
	case BPF_PROG_TYPE_XDP:
	case BPF_PROG_TYPE_SK_REUSEPORT:
	case BPF_PROG_TYPE_FLOW_DISSECTOR:
	case BPF_PROG_TYPE_SK_LOOKUP:
		return true;
	default:
		break;
	}

	verbose(env, "cannot update sockmap in this context\n");
	return false;
}

4361 4362 4363 4364 4365
static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
{
	return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
}

4366 4367
static int check_map_func_compatibility(struct bpf_verifier_env *env,
					struct bpf_map *map, int func_id)
4368 4369 4370 4371
{
	if (!map)
		return 0;

4372 4373 4374 4375 4376 4377 4378 4379
	/* We need a two way check, first is from map perspective ... */
	switch (map->map_type) {
	case BPF_MAP_TYPE_PROG_ARRAY:
		if (func_id != BPF_FUNC_tail_call)
			goto error;
		break;
	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
		if (func_id != BPF_FUNC_perf_event_read &&
4380
		    func_id != BPF_FUNC_perf_event_output &&
4381
		    func_id != BPF_FUNC_skb_output &&
4382 4383
		    func_id != BPF_FUNC_perf_event_read_value &&
		    func_id != BPF_FUNC_xdp_output)
4384 4385
			goto error;
		break;
4386 4387 4388 4389 4390 4391 4392 4393
	case BPF_MAP_TYPE_RINGBUF:
		if (func_id != BPF_FUNC_ringbuf_output &&
		    func_id != BPF_FUNC_ringbuf_reserve &&
		    func_id != BPF_FUNC_ringbuf_submit &&
		    func_id != BPF_FUNC_ringbuf_discard &&
		    func_id != BPF_FUNC_ringbuf_query)
			goto error;
		break;
4394 4395 4396 4397
	case BPF_MAP_TYPE_STACK_TRACE:
		if (func_id != BPF_FUNC_get_stackid)
			goto error;
		break;
4398
	case BPF_MAP_TYPE_CGROUP_ARRAY:
4399
		if (func_id != BPF_FUNC_skb_under_cgroup &&
4400
		    func_id != BPF_FUNC_current_task_under_cgroup)
4401 4402
			goto error;
		break;
4403
	case BPF_MAP_TYPE_CGROUP_STORAGE:
4404
	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
4405 4406 4407
		if (func_id != BPF_FUNC_get_local_storage)
			goto error;
		break;
4408
	case BPF_MAP_TYPE_DEVMAP:
4409
	case BPF_MAP_TYPE_DEVMAP_HASH:
4410 4411
		if (func_id != BPF_FUNC_redirect_map &&
		    func_id != BPF_FUNC_map_lookup_elem)
4412 4413
			goto error;
		break;
4414 4415 4416
	/* Restrict bpf side of cpumap and xskmap, open when use-cases
	 * appear.
	 */
4417 4418 4419 4420
	case BPF_MAP_TYPE_CPUMAP:
		if (func_id != BPF_FUNC_redirect_map)
			goto error;
		break;
4421 4422 4423 4424 4425
	case BPF_MAP_TYPE_XSKMAP:
		if (func_id != BPF_FUNC_redirect_map &&
		    func_id != BPF_FUNC_map_lookup_elem)
			goto error;
		break;
4426
	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
M
Martin KaFai Lau 已提交
4427
	case BPF_MAP_TYPE_HASH_OF_MAPS:
4428 4429
		if (func_id != BPF_FUNC_map_lookup_elem)
			goto error;
4430
		break;
4431 4432 4433
	case BPF_MAP_TYPE_SOCKMAP:
		if (func_id != BPF_FUNC_sk_redirect_map &&
		    func_id != BPF_FUNC_sock_map_update &&
4434
		    func_id != BPF_FUNC_map_delete_elem &&
4435
		    func_id != BPF_FUNC_msg_redirect_map &&
4436
		    func_id != BPF_FUNC_sk_select_reuseport &&
4437 4438
		    func_id != BPF_FUNC_map_lookup_elem &&
		    !may_update_sockmap(env, func_id))
4439 4440
			goto error;
		break;
4441 4442 4443 4444
	case BPF_MAP_TYPE_SOCKHASH:
		if (func_id != BPF_FUNC_sk_redirect_hash &&
		    func_id != BPF_FUNC_sock_hash_update &&
		    func_id != BPF_FUNC_map_delete_elem &&
4445
		    func_id != BPF_FUNC_msg_redirect_hash &&
4446
		    func_id != BPF_FUNC_sk_select_reuseport &&
4447 4448
		    func_id != BPF_FUNC_map_lookup_elem &&
		    !may_update_sockmap(env, func_id))
4449 4450
			goto error;
		break;
4451 4452 4453 4454
	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
		if (func_id != BPF_FUNC_sk_select_reuseport)
			goto error;
		break;
M
Mauricio Vasquez B 已提交
4455 4456 4457 4458 4459 4460 4461
	case BPF_MAP_TYPE_QUEUE:
	case BPF_MAP_TYPE_STACK:
		if (func_id != BPF_FUNC_map_peek_elem &&
		    func_id != BPF_FUNC_map_pop_elem &&
		    func_id != BPF_FUNC_map_push_elem)
			goto error;
		break;
4462 4463 4464 4465 4466
	case BPF_MAP_TYPE_SK_STORAGE:
		if (func_id != BPF_FUNC_sk_storage_get &&
		    func_id != BPF_FUNC_sk_storage_delete)
			goto error;
		break;
4467 4468 4469 4470 4471
	case BPF_MAP_TYPE_INODE_STORAGE:
		if (func_id != BPF_FUNC_inode_storage_get &&
		    func_id != BPF_FUNC_inode_storage_delete)
			goto error;
		break;
4472 4473 4474 4475 4476 4477 4478 4479 4480
	default:
		break;
	}

	/* ... and second from the function itself. */
	switch (func_id) {
	case BPF_FUNC_tail_call:
		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
			goto error;
4481 4482
		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
4483 4484
			return -EINVAL;
		}
4485 4486 4487
		break;
	case BPF_FUNC_perf_event_read:
	case BPF_FUNC_perf_event_output:
4488
	case BPF_FUNC_perf_event_read_value:
4489
	case BPF_FUNC_skb_output:
4490
	case BPF_FUNC_xdp_output:
4491 4492 4493 4494 4495 4496 4497
		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
			goto error;
		break;
	case BPF_FUNC_get_stackid:
		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
			goto error;
		break;
4498
	case BPF_FUNC_current_task_under_cgroup:
4499
	case BPF_FUNC_skb_under_cgroup:
4500 4501 4502
		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
			goto error;
		break;
4503
	case BPF_FUNC_redirect_map:
4504
		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
4505
		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
4506 4507
		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
		    map->map_type != BPF_MAP_TYPE_XSKMAP)
4508 4509
			goto error;
		break;
4510
	case BPF_FUNC_sk_redirect_map:
4511
	case BPF_FUNC_msg_redirect_map:
4512
	case BPF_FUNC_sock_map_update:
4513 4514 4515
		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
			goto error;
		break;
4516 4517 4518 4519
	case BPF_FUNC_sk_redirect_hash:
	case BPF_FUNC_msg_redirect_hash:
	case BPF_FUNC_sock_hash_update:
		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
4520 4521
			goto error;
		break;
4522
	case BPF_FUNC_get_local_storage:
4523 4524
		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
4525 4526
			goto error;
		break;
4527
	case BPF_FUNC_sk_select_reuseport:
4528 4529 4530
		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
4531 4532
			goto error;
		break;
M
Mauricio Vasquez B 已提交
4533 4534 4535 4536 4537 4538 4539
	case BPF_FUNC_map_peek_elem:
	case BPF_FUNC_map_pop_elem:
	case BPF_FUNC_map_push_elem:
		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
		    map->map_type != BPF_MAP_TYPE_STACK)
			goto error;
		break;
4540 4541 4542 4543 4544
	case BPF_FUNC_sk_storage_get:
	case BPF_FUNC_sk_storage_delete:
		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
			goto error;
		break;
4545 4546 4547 4548 4549
	case BPF_FUNC_inode_storage_get:
	case BPF_FUNC_inode_storage_delete:
		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
			goto error;
		break;
4550 4551
	default:
		break;
4552 4553 4554
	}

	return 0;
4555
error:
4556
	verbose(env, "cannot pass map_type %d into func %s#%d\n",
4557
		map->map_type, func_id_name(func_id), func_id);
4558
	return -EINVAL;
4559 4560
}

4561
static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
4562 4563 4564
{
	int count = 0;

4565
	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
4566
		count++;
4567
	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
4568
		count++;
4569
	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
4570
		count++;
4571
	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
4572
		count++;
4573
	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
4574 4575
		count++;

4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609
	/* We only support one arg being in raw mode at the moment,
	 * which is sufficient for the helper functions we have
	 * right now.
	 */
	return count <= 1;
}

static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
				    enum bpf_arg_type arg_next)
{
	return (arg_type_is_mem_ptr(arg_curr) &&
	        !arg_type_is_mem_size(arg_next)) ||
	       (!arg_type_is_mem_ptr(arg_curr) &&
		arg_type_is_mem_size(arg_next));
}

static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
{
	/* bpf_xxx(..., buf, len) call will access 'len'
	 * bytes from memory 'buf'. Both arg types need
	 * to be paired, so make sure there's no buggy
	 * helper function specification.
	 */
	if (arg_type_is_mem_size(fn->arg1_type) ||
	    arg_type_is_mem_ptr(fn->arg5_type)  ||
	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
		return false;

	return true;
}

4610
static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
4611 4612 4613
{
	int count = 0;

4614
	if (arg_type_may_be_refcounted(fn->arg1_type))
4615
		count++;
4616
	if (arg_type_may_be_refcounted(fn->arg2_type))
4617
		count++;
4618
	if (arg_type_may_be_refcounted(fn->arg3_type))
4619
		count++;
4620
	if (arg_type_may_be_refcounted(fn->arg4_type))
4621
		count++;
4622
	if (arg_type_may_be_refcounted(fn->arg5_type))
4623 4624
		count++;

4625 4626 4627
	/* A reference acquiring function cannot acquire
	 * another refcounted ptr.
	 */
4628
	if (may_be_acquire_function(func_id) && count)
4629 4630
		return false;

4631 4632 4633 4634 4635 4636
	/* We only support one arg being unreferenced at the moment,
	 * which is sufficient for the helper functions we have right now.
	 */
	return count <= 1;
}

4637 4638 4639 4640
static bool check_btf_id_ok(const struct bpf_func_proto *fn)
{
	int i;

4641
	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4642 4643 4644
		if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
			return false;

4645 4646 4647 4648
		if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
			return false;
	}

4649 4650 4651
	return true;
}

4652
static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
4653 4654
{
	return check_raw_mode_ok(fn) &&
4655
	       check_arg_pair_ok(fn) &&
4656
	       check_btf_id_ok(fn) &&
4657
	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
4658 4659
}

4660 4661
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
 * are now invalid, so turn them into unknown SCALAR_VALUE.
4662
 */
4663 4664
static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
				     struct bpf_func_state *state)
A
Alexei Starovoitov 已提交
4665
{
4666
	struct bpf_reg_state *regs = state->regs, *reg;
A
Alexei Starovoitov 已提交
4667 4668 4669
	int i;

	for (i = 0; i < MAX_BPF_REG; i++)
4670
		if (reg_is_pkt_pointer_any(&regs[i]))
4671
			mark_reg_unknown(env, regs, i);
A
Alexei Starovoitov 已提交
4672

4673 4674
	bpf_for_each_spilled_reg(i, state, reg) {
		if (!reg)
A
Alexei Starovoitov 已提交
4675
			continue;
4676
		if (reg_is_pkt_pointer_any(reg))
4677
			__mark_reg_unknown(env, reg);
A
Alexei Starovoitov 已提交
4678 4679 4680
	}
}

4681 4682 4683 4684 4685 4686 4687 4688 4689
static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
	struct bpf_verifier_state *vstate = env->cur_state;
	int i;

	for (i = 0; i <= vstate->curframe; i++)
		__clear_all_pkt_pointers(env, vstate->frame[i]);
}

4690
static void release_reg_references(struct bpf_verifier_env *env,
4691 4692
				   struct bpf_func_state *state,
				   int ref_obj_id)
4693 4694 4695 4696 4697
{
	struct bpf_reg_state *regs = state->regs, *reg;
	int i;

	for (i = 0; i < MAX_BPF_REG; i++)
4698
		if (regs[i].ref_obj_id == ref_obj_id)
4699 4700 4701 4702 4703
			mark_reg_unknown(env, regs, i);

	bpf_for_each_spilled_reg(i, state, reg) {
		if (!reg)
			continue;
4704
		if (reg->ref_obj_id == ref_obj_id)
4705
			__mark_reg_unknown(env, reg);
4706 4707 4708 4709 4710 4711 4712
	}
}

/* The pointer with the specified id has released its reference to kernel
 * resources. Identify all copies of the same pointer and clear the reference.
 */
static int release_reference(struct bpf_verifier_env *env,
4713
			     int ref_obj_id)
4714 4715
{
	struct bpf_verifier_state *vstate = env->cur_state;
4716
	int err;
4717 4718
	int i;

4719 4720 4721 4722
	err = release_reference_state(cur_func(env), ref_obj_id);
	if (err)
		return err;

4723
	for (i = 0; i <= vstate->curframe; i++)
4724
		release_reg_references(env, vstate->frame[i], ref_obj_id);
4725

4726
	return 0;
4727 4728
}

4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740
static void clear_caller_saved_regs(struct bpf_verifier_env *env,
				    struct bpf_reg_state *regs)
{
	int i;

	/* after the call registers r0 - r5 were scratched */
	for (i = 0; i < CALLER_SAVED_REGS; i++) {
		mark_reg_not_init(env, regs, caller_saved[i]);
		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
	}
}

4741 4742 4743 4744
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
			   int *insn_idx)
{
	struct bpf_verifier_state *state = env->cur_state;
4745
	struct bpf_func_info_aux *func_info_aux;
4746
	struct bpf_func_state *caller, *callee;
4747
	int i, err, subprog, target_insn;
4748
	bool is_global = false;
4749

A
Alexei Starovoitov 已提交
4750
	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
4751
		verbose(env, "the call stack of %d frames is too deep\n",
A
Alexei Starovoitov 已提交
4752
			state->curframe + 2);
4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770
		return -E2BIG;
	}

	target_insn = *insn_idx + insn->imm;
	subprog = find_subprog(env, target_insn + 1);
	if (subprog < 0) {
		verbose(env, "verifier bug. No program starts at insn %d\n",
			target_insn + 1);
		return -EFAULT;
	}

	caller = state->frame[state->curframe];
	if (state->frame[state->curframe + 1]) {
		verbose(env, "verifier bug. Frame %d already allocated\n",
			state->curframe + 1);
		return -EFAULT;
	}

4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796
	func_info_aux = env->prog->aux->func_info_aux;
	if (func_info_aux)
		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
	err = btf_check_func_arg_match(env, subprog, caller->regs);
	if (err == -EFAULT)
		return err;
	if (is_global) {
		if (err) {
			verbose(env, "Caller passes invalid args into func#%d\n",
				subprog);
			return err;
		} else {
			if (env->log.level & BPF_LOG_LEVEL)
				verbose(env,
					"Func#%d is global and valid. Skipping.\n",
					subprog);
			clear_caller_saved_regs(env, caller->regs);

			/* All global functions return SCALAR_VALUE */
			mark_reg_unknown(env, caller->regs, BPF_REG_0);

			/* continue with next insn after call */
			return 0;
		}
	}

4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809
	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
	if (!callee)
		return -ENOMEM;
	state->frame[state->curframe + 1] = callee;

	/* callee cannot access r0, r6 - r9 for reading and has to write
	 * into its own stack before reading from it.
	 * callee can read/write into caller's stack
	 */
	init_func_state(env, callee,
			/* remember the callsite, it will be used by bpf_exit */
			*insn_idx /* callsite */,
			state->curframe + 1 /* frameno within this callchain */,
J
Jiong Wang 已提交
4810
			subprog /* subprog number within this prog */);
4811

4812 4813 4814 4815 4816
	/* Transfer references to the callee */
	err = transfer_reference_state(callee, caller);
	if (err)
		return err;

4817 4818 4819
	/* copy r1 - r5 args that callee can access.  The copy includes parent
	 * pointers, which connects us up to the liveness chain
	 */
4820 4821 4822
	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
		callee->regs[i] = caller->regs[i];

4823
	clear_caller_saved_regs(env, caller->regs);
4824 4825 4826 4827 4828 4829 4830

	/* only increment it after check_reg_arg() finished */
	state->curframe++;

	/* and go analyze first insn of the callee */
	*insn_idx = target_insn;

4831
	if (env->log.level & BPF_LOG_LEVEL) {
4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844
		verbose(env, "caller:\n");
		print_verifier_state(env, caller);
		verbose(env, "callee:\n");
		print_verifier_state(env, callee);
	}
	return 0;
}

static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
	struct bpf_verifier_state *state = env->cur_state;
	struct bpf_func_state *caller, *callee;
	struct bpf_reg_state *r0;
4845
	int err;
4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864

	callee = state->frame[state->curframe];
	r0 = &callee->regs[BPF_REG_0];
	if (r0->type == PTR_TO_STACK) {
		/* technically it's ok to return caller's stack pointer
		 * (or caller's caller's pointer) back to the caller,
		 * since these pointers are valid. Only current stack
		 * pointer will be invalid as soon as function exits,
		 * but let's be conservative
		 */
		verbose(env, "cannot return stack pointer to the caller\n");
		return -EINVAL;
	}

	state->curframe--;
	caller = state->frame[state->curframe];
	/* return to the caller whatever r0 had in the callee */
	caller->regs[BPF_REG_0] = *r0;

4865 4866 4867 4868 4869
	/* Transfer references to the caller */
	err = transfer_reference_state(caller, callee);
	if (err)
		return err;

4870
	*insn_idx = callee->callsite + 1;
4871
	if (env->log.level & BPF_LOG_LEVEL) {
4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882
		verbose(env, "returning from callee:\n");
		print_verifier_state(env, callee);
		verbose(env, "to caller at %d:\n", *insn_idx);
		print_verifier_state(env, caller);
	}
	/* clear everything in the callee */
	free_func_state(callee);
	state->frame[state->curframe + 1] = NULL;
	return 0;
}

4883 4884 4885 4886 4887 4888 4889 4890
static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
				   int func_id,
				   struct bpf_call_arg_meta *meta)
{
	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];

	if (ret_type != RET_INTEGER ||
	    (func_id != BPF_FUNC_get_stack &&
4891 4892 4893
	     func_id != BPF_FUNC_probe_read_str &&
	     func_id != BPF_FUNC_probe_read_kernel_str &&
	     func_id != BPF_FUNC_probe_read_user_str))
4894 4895
		return;

4896
	ret_reg->smax_value = meta->msize_max_value;
4897
	ret_reg->s32_max_value = meta->msize_max_value;
4898 4899
	__reg_deduce_bounds(ret_reg);
	__reg_bound_offset(ret_reg);
4900
	__update_reg_bounds(ret_reg);
4901 4902
}

4903 4904 4905 4906 4907
static int
record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
		int func_id, int insn_idx)
{
	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
4908
	struct bpf_map *map = meta->map_ptr;
4909 4910

	if (func_id != BPF_FUNC_tail_call &&
4911 4912
	    func_id != BPF_FUNC_map_lookup_elem &&
	    func_id != BPF_FUNC_map_update_elem &&
M
Mauricio Vasquez B 已提交
4913 4914 4915 4916
	    func_id != BPF_FUNC_map_delete_elem &&
	    func_id != BPF_FUNC_map_push_elem &&
	    func_id != BPF_FUNC_map_pop_elem &&
	    func_id != BPF_FUNC_map_peek_elem)
4917
		return 0;
4918

4919
	if (map == NULL) {
4920 4921 4922 4923
		verbose(env, "kernel subsystem misconfigured verifier\n");
		return -EINVAL;
	}

4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936
	/* In case of read-only, some additional restrictions
	 * need to be applied in order to prevent altering the
	 * state of the map from program side.
	 */
	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
	    (func_id == BPF_FUNC_map_delete_elem ||
	     func_id == BPF_FUNC_map_update_elem ||
	     func_id == BPF_FUNC_map_push_elem ||
	     func_id == BPF_FUNC_map_pop_elem)) {
		verbose(env, "write into map forbidden\n");
		return -EACCES;
	}

4937
	if (!BPF_MAP_PTR(aux->map_ptr_state))
4938
		bpf_map_ptr_store(aux, meta->map_ptr,
A
Alexei Starovoitov 已提交
4939
				  !meta->map_ptr->bypass_spec_v1);
4940
	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
4941
		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
A
Alexei Starovoitov 已提交
4942
				  !meta->map_ptr->bypass_spec_v1);
4943 4944 4945
	return 0;
}

4946 4947 4948 4949 4950 4951 4952 4953 4954
static int
record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
		int func_id, int insn_idx)
{
	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
	struct bpf_reg_state *regs = cur_regs(env), *reg;
	struct bpf_map *map = meta->map_ptr;
	struct tnum range;
	u64 val;
4955
	int err;
4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971

	if (func_id != BPF_FUNC_tail_call)
		return 0;
	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
		verbose(env, "kernel subsystem misconfigured verifier\n");
		return -EINVAL;
	}

	range = tnum_range(0, map->max_entries - 1);
	reg = &regs[BPF_REG_3];

	if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
		return 0;
	}

4972 4973 4974 4975
	err = mark_chain_precision(env, BPF_REG_3);
	if (err)
		return err;

4976 4977 4978 4979 4980 4981 4982 4983 4984
	val = reg->var_off.value;
	if (bpf_map_key_unseen(aux))
		bpf_map_key_store(aux, val);
	else if (!bpf_map_key_poisoned(aux) &&
		  bpf_map_key_immediate(aux) != val)
		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
	return 0;
}

4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996
static int check_reference_leak(struct bpf_verifier_env *env)
{
	struct bpf_func_state *state = cur_func(env);
	int i;

	for (i = 0; i < state->acquired_refs; i++) {
		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
			state->refs[i].id, state->refs[i].insn_idx);
	}
	return state->acquired_refs ? -EINVAL : 0;
}

4997
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
4998 4999
{
	const struct bpf_func_proto *fn = NULL;
5000
	struct bpf_reg_state *regs;
5001
	struct bpf_call_arg_meta meta;
A
Alexei Starovoitov 已提交
5002
	bool changes_data;
5003 5004 5005 5006
	int i, err;

	/* find function prototype */
	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
5007 5008
		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
			func_id);
5009 5010 5011
		return -EINVAL;
	}

5012
	if (env->ops->get_func_proto)
5013
		fn = env->ops->get_func_proto(func_id, env->prog);
5014
	if (!fn) {
5015 5016
		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
			func_id);
5017 5018 5019 5020
		return -EINVAL;
	}

	/* eBPF programs must be GPL compatible to use GPL-ed functions */
5021
	if (!env->prog->gpl_compatible && fn->gpl_only) {
5022
		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
5023 5024 5025
		return -EINVAL;
	}

J
Jiri Olsa 已提交
5026 5027 5028 5029 5030
	if (fn->allowed && !fn->allowed(env->prog)) {
		verbose(env, "helper call is not allowed in probe\n");
		return -EINVAL;
	}

5031
	/* With LD_ABS/IND some JITs save/restore skb from r1. */
5032
	changes_data = bpf_helper_changes_pkt_data(fn->func);
5033 5034 5035 5036 5037
	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
			func_id_name(func_id), func_id);
		return -EINVAL;
	}
A
Alexei Starovoitov 已提交
5038

5039
	memset(&meta, 0, sizeof(meta));
5040
	meta.pkt_access = fn->pkt_access;
5041

5042
	err = check_func_proto(fn, func_id);
5043
	if (err) {
5044
		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
5045
			func_id_name(func_id), func_id);
5046 5047 5048
		return err;
	}

5049
	meta.func_id = func_id;
5050
	/* check args */
5051
	for (i = 0; i < 5; i++) {
5052
		err = check_func_arg(env, i, &meta, fn);
5053 5054 5055
		if (err)
			return err;
	}
5056

5057 5058 5059 5060
	err = record_func_map(env, &meta, func_id, insn_idx);
	if (err)
		return err;

5061 5062 5063 5064
	err = record_func_key(env, &meta, func_id, insn_idx);
	if (err)
		return err;

5065 5066 5067 5068
	/* Mark slots with STACK_MISC in case of raw mode, stack offset
	 * is inferred from register state.
	 */
	for (i = 0; i < meta.access_size; i++) {
5069 5070
		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
				       BPF_WRITE, -1, false);
5071 5072 5073 5074
		if (err)
			return err;
	}

5075 5076 5077 5078 5079 5080 5081
	if (func_id == BPF_FUNC_tail_call) {
		err = check_reference_leak(env);
		if (err) {
			verbose(env, "tail_call would lead to reference leak\n");
			return err;
		}
	} else if (is_release_function(func_id)) {
5082
		err = release_reference(env, meta.ref_obj_id);
5083 5084 5085
		if (err) {
			verbose(env, "func %s#%d reference has not been acquired before\n",
				func_id_name(func_id), func_id);
5086
			return err;
5087
		}
5088 5089
	}

5090
	regs = cur_regs(env);
5091 5092 5093 5094 5095 5096 5097 5098 5099 5100

	/* check that flags argument in get_local_storage(map, flags) is 0,
	 * this is required because get_local_storage() can't return an error.
	 */
	if (func_id == BPF_FUNC_get_local_storage &&
	    !register_is_null(&regs[BPF_REG_2])) {
		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
		return -EINVAL;
	}

5101
	/* reset caller saved regs */
5102
	for (i = 0; i < CALLER_SAVED_REGS; i++) {
5103
		mark_reg_not_init(env, regs, caller_saved[i]);
5104 5105
		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
	}
5106

5107 5108 5109
	/* helper call returns 64-bit value. */
	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;

5110
	/* update return register (already marked as written above) */
5111
	if (fn->ret_type == RET_INTEGER) {
5112
		/* sets type to SCALAR_VALUE */
5113
		mark_reg_unknown(env, regs, BPF_REG_0);
5114 5115
	} else if (fn->ret_type == RET_VOID) {
		regs[BPF_REG_0].type = NOT_INIT;
5116 5117
	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
5118
		/* There is no offset yet applied, variable or fixed */
5119
		mark_reg_known_zero(env, regs, BPF_REG_0);
5120 5121 5122 5123
		/* remember map_ptr, so that check_map_access()
		 * can check 'value_size' boundary of memory access
		 * to map element returned from bpf_map_lookup_elem()
		 */
5124
		if (meta.map_ptr == NULL) {
5125 5126
			verbose(env,
				"kernel subsystem misconfigured verifier\n");
5127 5128
			return -EINVAL;
		}
5129
		regs[BPF_REG_0].map_ptr = meta.map_ptr;
5130 5131
		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
5132 5133
			if (map_value_has_spin_lock(meta.map_ptr))
				regs[BPF_REG_0].id = ++env->id_gen;
5134 5135 5136
		} else {
			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
		}
5137 5138 5139
	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
		mark_reg_known_zero(env, regs, BPF_REG_0);
		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
5140 5141 5142
	} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
		mark_reg_known_zero(env, regs, BPF_REG_0);
		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
5143 5144 5145
	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
		mark_reg_known_zero(env, regs, BPF_REG_0);
		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
5146 5147 5148 5149
	} else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
		mark_reg_known_zero(env, regs, BPF_REG_0);
		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
		regs[BPF_REG_0].mem_size = meta.mem_size;
H
Hao Luo 已提交
5150 5151
	} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
		   fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
H
Hao Luo 已提交
5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168
		const struct btf_type *t;

		mark_reg_known_zero(env, regs, BPF_REG_0);
		t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
		if (!btf_type_is_struct(t)) {
			u32 tsize;
			const struct btf_type *ret;
			const char *tname;

			/* resolve the type size of ksym. */
			ret = btf_resolve_size(btf_vmlinux, t, &tsize);
			if (IS_ERR(ret)) {
				tname = btf_name_by_offset(btf_vmlinux, t->name_off);
				verbose(env, "unable to resolve the size of type '%s': %ld\n",
					tname, PTR_ERR(ret));
				return -EINVAL;
			}
H
Hao Luo 已提交
5169 5170 5171
			regs[BPF_REG_0].type =
				fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
				PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
H
Hao Luo 已提交
5172 5173
			regs[BPF_REG_0].mem_size = tsize;
		} else {
H
Hao Luo 已提交
5174 5175 5176
			regs[BPF_REG_0].type =
				fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
				PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
H
Hao Luo 已提交
5177 5178
			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
		}
5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190
	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
		int ret_btf_id;

		mark_reg_known_zero(env, regs, BPF_REG_0);
		regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
		ret_btf_id = *fn->ret_btf_id;
		if (ret_btf_id == 0) {
			verbose(env, "invalid return type %d of func %s#%d\n",
				fn->ret_type, func_id_name(func_id), func_id);
			return -EINVAL;
		}
		regs[BPF_REG_0].btf_id = ret_btf_id;
5191
	} else {
5192
		verbose(env, "unknown return type %d of func %s#%d\n",
5193
			fn->ret_type, func_id_name(func_id), func_id);
5194 5195
		return -EINVAL;
	}
5196

5197 5198 5199
	if (reg_type_may_be_null(regs[BPF_REG_0].type))
		regs[BPF_REG_0].id = ++env->id_gen;

5200
	if (is_ptr_cast_function(func_id)) {
5201 5202
		/* For release_reference() */
		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
5203
	} else if (is_acquire_function(func_id, meta.map_ptr)) {
5204 5205 5206 5207 5208 5209 5210 5211 5212
		int id = acquire_reference_state(env, insn_idx);

		if (id < 0)
			return id;
		/* For mark_ptr_or_null_reg() */
		regs[BPF_REG_0].id = id;
		/* For release_reference() */
		regs[BPF_REG_0].ref_obj_id = id;
	}
5213

5214 5215
	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);

5216
	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
5217 5218
	if (err)
		return err;
5219

5220 5221 5222
	if ((func_id == BPF_FUNC_get_stack ||
	     func_id == BPF_FUNC_get_task_stack) &&
	    !env->prog->has_callchain_buf) {
Y
Yonghong Song 已提交
5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239
		const char *err_str;

#ifdef CONFIG_PERF_EVENTS
		err = get_callchain_buffers(sysctl_perf_event_max_stack);
		err_str = "cannot get callchain buffer for func %s#%d\n";
#else
		err = -ENOTSUPP;
		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
#endif
		if (err) {
			verbose(env, err_str, func_id_name(func_id), func_id);
			return err;
		}

		env->prog->has_callchain_buf = true;
	}

5240 5241 5242
	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
		env->prog->call_get_stack = true;

A
Alexei Starovoitov 已提交
5243 5244 5245 5246 5247
	if (changes_data)
		clear_all_pkt_pointers(env);
	return 0;
}

5248 5249 5250 5251 5252 5253 5254 5255 5256 5257
static bool signed_add_overflows(s64 a, s64 b)
{
	/* Do the add in u64, where overflow is well-defined */
	s64 res = (s64)((u64)a + (u64)b);

	if (b < 0)
		return res > a;
	return res < a;
}

5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268
static bool signed_add32_overflows(s64 a, s64 b)
{
	/* Do the add in u32, where overflow is well-defined */
	s32 res = (s32)((u32)a + (u32)b);

	if (b < 0)
		return res > a;
	return res < a;
}

static bool signed_sub_overflows(s32 a, s32 b)
5269 5270 5271 5272 5273 5274 5275
{
	/* Do the sub in u64, where overflow is well-defined */
	s64 res = (s64)((u64)a - (u64)b);

	if (b < 0)
		return res < a;
	return res > a;
A
Alexei Starovoitov 已提交
5276 5277
}

5278 5279 5280 5281 5282 5283 5284 5285 5286 5287
static bool signed_sub32_overflows(s32 a, s32 b)
{
	/* Do the sub in u64, where overflow is well-defined */
	s32 res = (s32)((u32)a - (u32)b);

	if (b < 0)
		return res < a;
	return res > a;
}

A
Alexei Starovoitov 已提交
5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322
static bool check_reg_sane_offset(struct bpf_verifier_env *env,
				  const struct bpf_reg_state *reg,
				  enum bpf_reg_type type)
{
	bool known = tnum_is_const(reg->var_off);
	s64 val = reg->var_off.value;
	s64 smin = reg->smin_value;

	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
		verbose(env, "math between %s pointer and %lld is not allowed\n",
			reg_type_str[type], val);
		return false;
	}

	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
		verbose(env, "%s pointer offset %d is not allowed\n",
			reg_type_str[type], reg->off);
		return false;
	}

	if (smin == S64_MIN) {
		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
			reg_type_str[type]);
		return false;
	}

	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
		verbose(env, "value %lld makes %s pointer be out of bounds\n",
			smin, reg_type_str[type]);
		return false;
	}

	return true;
}

5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336
static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
{
	return &env->insn_aux_data[env->insn_idx];
}

static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
			      u32 *ptr_limit, u8 opcode, bool off_is_neg)
{
	bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
			    (opcode == BPF_SUB && !off_is_neg);
	u32 off;

	switch (ptr_reg->type) {
	case PTR_TO_STACK:
5337 5338 5339
		/* Indirect variable offset stack access is prohibited in
		 * unprivileged mode so it's not handled here.
		 */
5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358
		off = ptr_reg->off + ptr_reg->var_off.value;
		if (mask_to_left)
			*ptr_limit = MAX_BPF_STACK + off;
		else
			*ptr_limit = -off;
		return 0;
	case PTR_TO_MAP_VALUE:
		if (mask_to_left) {
			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
		} else {
			off = ptr_reg->smin_value + ptr_reg->off;
			*ptr_limit = ptr_reg->map_ptr->value_size - off;
		}
		return 0;
	default:
		return -EINVAL;
	}
}

5359 5360 5361
static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
				    const struct bpf_insn *insn)
{
A
Alexei Starovoitov 已提交
5362
	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392
}

static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
				       u32 alu_state, u32 alu_limit)
{
	/* If we arrived here from different branches with different
	 * state or limits to sanitize, then this won't work.
	 */
	if (aux->alu_state &&
	    (aux->alu_state != alu_state ||
	     aux->alu_limit != alu_limit))
		return -EACCES;

	/* Corresponding fixup done in fixup_bpf_calls(). */
	aux->alu_state = alu_state;
	aux->alu_limit = alu_limit;
	return 0;
}

static int sanitize_val_alu(struct bpf_verifier_env *env,
			    struct bpf_insn *insn)
{
	struct bpf_insn_aux_data *aux = cur_aux(env);

	if (can_skip_alu_sanitation(env, insn))
		return 0;

	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
}

5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406
static int sanitize_ptr_alu(struct bpf_verifier_env *env,
			    struct bpf_insn *insn,
			    const struct bpf_reg_state *ptr_reg,
			    struct bpf_reg_state *dst_reg,
			    bool off_is_neg)
{
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_insn_aux_data *aux = cur_aux(env);
	bool ptr_is_dst_reg = ptr_reg == dst_reg;
	u8 opcode = BPF_OP(insn->code);
	u32 alu_state, alu_limit;
	struct bpf_reg_state tmp;
	bool ret;

5407
	if (can_skip_alu_sanitation(env, insn))
5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422
		return 0;

	/* We already marked aux for masking from non-speculative
	 * paths, thus we got here in the first place. We only care
	 * to explore bad access from here.
	 */
	if (vstate->speculative)
		goto do_sim;

	alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
	alu_state |= ptr_is_dst_reg ?
		     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;

	if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
		return 0;
5423
	if (update_alu_sanitation_state(aux, alu_state, alu_limit))
5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439
		return -EACCES;
do_sim:
	/* Simulate and find potential out-of-bounds access under
	 * speculative execution from truncation as a result of
	 * masking when off was not within expected range. If off
	 * sits in dst, then we temporarily need to move ptr there
	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
	 * for cases where we use K-based arithmetic in one direction
	 * and truncated reg-based in the other in order to explore
	 * bad access.
	 */
	if (!ptr_is_dst_reg) {
		tmp = *dst_reg;
		*dst_reg = *ptr_reg;
	}
	ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
5440
	if (!ptr_is_dst_reg && ret)
5441 5442 5443 5444
		*dst_reg = tmp;
	return !ret ? -EFAULT : 0;
}

5445 5446 5447 5448 5449 5450 5451 5452 5453
/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
 * Caller should also handle BPF_MOV case separately.
 * If we return -EACCES, caller may want to try again treating pointer as a
 * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
 */
static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
				   struct bpf_insn *insn,
				   const struct bpf_reg_state *ptr_reg,
				   const struct bpf_reg_state *off_reg)
A
Alexei Starovoitov 已提交
5454
{
5455 5456 5457
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
	struct bpf_reg_state *regs = state->regs, *dst_reg;
5458
	bool known = tnum_is_const(off_reg->var_off);
5459 5460 5461 5462
	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
5463
	u32 dst = insn->dst_reg, src = insn->src_reg;
A
Alexei Starovoitov 已提交
5464
	u8 opcode = BPF_OP(insn->code);
5465
	int ret;
A
Alexei Starovoitov 已提交
5466

5467
	dst_reg = &regs[dst];
A
Alexei Starovoitov 已提交
5468

5469 5470 5471 5472 5473
	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
	    smin_val > smax_val || umin_val > umax_val) {
		/* Taint dst register if offset had invalid bounds derived from
		 * e.g. dead branches.
		 */
5474
		__mark_reg_unknown(env, dst_reg);
5475
		return 0;
5476 5477 5478 5479
	}

	if (BPF_CLASS(insn->code) != BPF_ALU64) {
		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
5480 5481 5482 5483 5484
		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
			__mark_reg_unknown(env, dst_reg);
			return 0;
		}

5485 5486 5487
		verbose(env,
			"R%d 32-bit pointer arithmetic prohibited\n",
			dst);
5488
		return -EACCES;
A
Alexei Starovoitov 已提交
5489 5490
	}

5491 5492 5493 5494
	switch (ptr_reg->type) {
	case PTR_TO_MAP_VALUE_OR_NULL:
		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
			dst, reg_type_str[ptr_reg->type]);
5495
		return -EACCES;
5496
	case CONST_PTR_TO_MAP:
5497 5498 5499
		/* smin_val represents the known value */
		if (known && smin_val == 0 && opcode == BPF_ADD)
			break;
5500
		fallthrough;
5501
	case PTR_TO_PACKET_END:
5502 5503
	case PTR_TO_SOCKET:
	case PTR_TO_SOCKET_OR_NULL:
5504 5505
	case PTR_TO_SOCK_COMMON:
	case PTR_TO_SOCK_COMMON_OR_NULL:
5506 5507
	case PTR_TO_TCP_SOCK:
	case PTR_TO_TCP_SOCK_OR_NULL:
5508
	case PTR_TO_XDP_SOCK:
5509 5510
		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
			dst, reg_type_str[ptr_reg->type]);
5511
		return -EACCES;
5512 5513 5514 5515 5516 5517
	case PTR_TO_MAP_VALUE:
		if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
			verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
				off_reg == dst_reg ? dst : src);
			return -EACCES;
		}
5518
		fallthrough;
5519 5520
	default:
		break;
5521 5522 5523 5524
	}

	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
	 * The id may be overwritten later if we create a new variable offset.
A
Alexei Starovoitov 已提交
5525
	 */
5526 5527
	dst_reg->type = ptr_reg->type;
	dst_reg->id = ptr_reg->id;
A
Alexei Starovoitov 已提交
5528

A
Alexei Starovoitov 已提交
5529 5530 5531 5532
	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
		return -EINVAL;

5533 5534 5535
	/* pointer types do not carry 32-bit bounds at the moment. */
	__mark_reg32_unbounded(dst_reg);

5536 5537
	switch (opcode) {
	case BPF_ADD:
5538 5539 5540 5541 5542
		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
		if (ret < 0) {
			verbose(env, "R%d tried to add from different maps or paths\n", dst);
			return ret;
		}
5543 5544
		/* We can take a fixed offset as long as it doesn't overflow
		 * the s32 'off' field
A
Alexei Starovoitov 已提交
5545
		 */
5546 5547
		if (known && (ptr_reg->off + smin_val ==
			      (s64)(s32)(ptr_reg->off + smin_val))) {
5548
			/* pointer += K.  Accumulate it into fixed offset */
5549 5550 5551 5552
			dst_reg->smin_value = smin_ptr;
			dst_reg->smax_value = smax_ptr;
			dst_reg->umin_value = umin_ptr;
			dst_reg->umax_value = umax_ptr;
5553
			dst_reg->var_off = ptr_reg->var_off;
5554
			dst_reg->off = ptr_reg->off + smin_val;
5555
			dst_reg->raw = ptr_reg->raw;
5556 5557 5558 5559 5560 5561 5562 5563 5564 5565
			break;
		}
		/* A new variable offset is created.  Note that off_reg->off
		 * == 0, since it's a scalar.
		 * dst_reg gets the pointer type and since some positive
		 * integer value was added to the pointer, give it a new 'id'
		 * if it's a PTR_TO_PACKET.
		 * this creates a new 'base' pointer, off_reg (variable) gets
		 * added into the variable offset, and we copy the fixed offset
		 * from ptr_reg.
A
Alexei Starovoitov 已提交
5566
		 */
5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582
		if (signed_add_overflows(smin_ptr, smin_val) ||
		    signed_add_overflows(smax_ptr, smax_val)) {
			dst_reg->smin_value = S64_MIN;
			dst_reg->smax_value = S64_MAX;
		} else {
			dst_reg->smin_value = smin_ptr + smin_val;
			dst_reg->smax_value = smax_ptr + smax_val;
		}
		if (umin_ptr + umin_val < umin_ptr ||
		    umax_ptr + umax_val < umax_ptr) {
			dst_reg->umin_value = 0;
			dst_reg->umax_value = U64_MAX;
		} else {
			dst_reg->umin_value = umin_ptr + umin_val;
			dst_reg->umax_value = umax_ptr + umax_val;
		}
5583 5584
		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
		dst_reg->off = ptr_reg->off;
5585
		dst_reg->raw = ptr_reg->raw;
5586
		if (reg_is_pkt_pointer(ptr_reg)) {
5587 5588
			dst_reg->id = ++env->id_gen;
			/* something was added to pkt_ptr, set range to zero */
5589
			dst_reg->raw = 0;
5590 5591 5592
		}
		break;
	case BPF_SUB:
5593 5594 5595 5596 5597
		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
		if (ret < 0) {
			verbose(env, "R%d tried to sub from different maps or paths\n", dst);
			return ret;
		}
5598 5599
		if (dst_reg == off_reg) {
			/* scalar -= pointer.  Creates an unknown scalar */
5600 5601
			verbose(env, "R%d tried to subtract pointer from scalar\n",
				dst);
5602 5603 5604 5605 5606
			return -EACCES;
		}
		/* We don't allow subtraction from FP, because (according to
		 * test_verifier.c test "invalid fp arithmetic", JITs might not
		 * be able to deal with it.
A
Alexei Starovoitov 已提交
5607
		 */
5608
		if (ptr_reg->type == PTR_TO_STACK) {
5609 5610
			verbose(env, "R%d subtraction from stack pointer prohibited\n",
				dst);
5611 5612
			return -EACCES;
		}
5613 5614
		if (known && (ptr_reg->off - smin_val ==
			      (s64)(s32)(ptr_reg->off - smin_val))) {
5615
			/* pointer -= K.  Subtract it from fixed offset */
5616 5617 5618 5619
			dst_reg->smin_value = smin_ptr;
			dst_reg->smax_value = smax_ptr;
			dst_reg->umin_value = umin_ptr;
			dst_reg->umax_value = umax_ptr;
5620 5621
			dst_reg->var_off = ptr_reg->var_off;
			dst_reg->id = ptr_reg->id;
5622
			dst_reg->off = ptr_reg->off - smin_val;
5623
			dst_reg->raw = ptr_reg->raw;
5624 5625 5626 5627
			break;
		}
		/* A new variable offset is created.  If the subtrahend is known
		 * nonnegative, then any reg->range we had before is still good.
A
Alexei Starovoitov 已提交
5628
		 */
5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646
		if (signed_sub_overflows(smin_ptr, smax_val) ||
		    signed_sub_overflows(smax_ptr, smin_val)) {
			/* Overflow possible, we know nothing */
			dst_reg->smin_value = S64_MIN;
			dst_reg->smax_value = S64_MAX;
		} else {
			dst_reg->smin_value = smin_ptr - smax_val;
			dst_reg->smax_value = smax_ptr - smin_val;
		}
		if (umin_ptr < umax_val) {
			/* Overflow possible, we know nothing */
			dst_reg->umin_value = 0;
			dst_reg->umax_value = U64_MAX;
		} else {
			/* Cannot overflow (as long as bounds are consistent) */
			dst_reg->umin_value = umin_ptr - umax_val;
			dst_reg->umax_value = umax_ptr - umin_val;
		}
5647 5648
		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
		dst_reg->off = ptr_reg->off;
5649
		dst_reg->raw = ptr_reg->raw;
5650
		if (reg_is_pkt_pointer(ptr_reg)) {
5651 5652
			dst_reg->id = ++env->id_gen;
			/* something was added to pkt_ptr, set range to zero */
5653
			if (smin_val < 0)
5654
				dst_reg->raw = 0;
5655
		}
5656 5657 5658 5659
		break;
	case BPF_AND:
	case BPF_OR:
	case BPF_XOR:
5660 5661 5662
		/* bitwise ops on pointers are troublesome, prohibit. */
		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
			dst, bpf_alu_string[opcode >> 4]);
5663 5664 5665
		return -EACCES;
	default:
		/* other operators (e.g. MUL,LSH) produce non-pointer results */
5666 5667
		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
			dst, bpf_alu_string[opcode >> 4]);
5668
		return -EACCES;
5669 5670
	}

A
Alexei Starovoitov 已提交
5671 5672 5673
	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
		return -EINVAL;

5674 5675 5676
	__update_reg_bounds(dst_reg);
	__reg_deduce_bounds(dst_reg);
	__reg_bound_offset(dst_reg);
5677 5678 5679 5680

	/* For unprivileged we require that resulting offset must be in bounds
	 * in order to be able to sanitize access later on.
	 */
A
Alexei Starovoitov 已提交
5681
	if (!env->bypass_spec_v1) {
5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693
		if (dst_reg->type == PTR_TO_MAP_VALUE &&
		    check_map_access(env, dst, dst_reg->off, 1, false)) {
			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
				"prohibited for !root\n", dst);
			return -EACCES;
		} else if (dst_reg->type == PTR_TO_STACK &&
			   check_stack_access(env, dst_reg, dst_reg->off +
					      dst_reg->var_off.value, 1)) {
			verbose(env, "R%d stack pointer arithmetic goes out of range, "
				"prohibited for !root\n", dst);
			return -EACCES;
		}
5694 5695
	}

5696 5697 5698
	return 0;
}

5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724
static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
				 struct bpf_reg_state *src_reg)
{
	s32 smin_val = src_reg->s32_min_value;
	s32 smax_val = src_reg->s32_max_value;
	u32 umin_val = src_reg->u32_min_value;
	u32 umax_val = src_reg->u32_max_value;

	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
		dst_reg->s32_min_value = S32_MIN;
		dst_reg->s32_max_value = S32_MAX;
	} else {
		dst_reg->s32_min_value += smin_val;
		dst_reg->s32_max_value += smax_val;
	}
	if (dst_reg->u32_min_value + umin_val < umin_val ||
	    dst_reg->u32_max_value + umax_val < umax_val) {
		dst_reg->u32_min_value = 0;
		dst_reg->u32_max_value = U32_MAX;
	} else {
		dst_reg->u32_min_value += umin_val;
		dst_reg->u32_max_value += umax_val;
	}
}

5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748
static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
			       struct bpf_reg_state *src_reg)
{
	s64 smin_val = src_reg->smin_value;
	s64 smax_val = src_reg->smax_value;
	u64 umin_val = src_reg->umin_value;
	u64 umax_val = src_reg->umax_value;

	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
		dst_reg->smin_value = S64_MIN;
		dst_reg->smax_value = S64_MAX;
	} else {
		dst_reg->smin_value += smin_val;
		dst_reg->smax_value += smax_val;
	}
	if (dst_reg->umin_value + umin_val < umin_val ||
	    dst_reg->umax_value + umax_val < umax_val) {
		dst_reg->umin_value = 0;
		dst_reg->umax_value = U64_MAX;
	} else {
		dst_reg->umin_value += umin_val;
		dst_reg->umax_value += umax_val;
	}
5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776
}

static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
				 struct bpf_reg_state *src_reg)
{
	s32 smin_val = src_reg->s32_min_value;
	s32 smax_val = src_reg->s32_max_value;
	u32 umin_val = src_reg->u32_min_value;
	u32 umax_val = src_reg->u32_max_value;

	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
		/* Overflow possible, we know nothing */
		dst_reg->s32_min_value = S32_MIN;
		dst_reg->s32_max_value = S32_MAX;
	} else {
		dst_reg->s32_min_value -= smax_val;
		dst_reg->s32_max_value -= smin_val;
	}
	if (dst_reg->u32_min_value < umax_val) {
		/* Overflow possible, we know nothing */
		dst_reg->u32_min_value = 0;
		dst_reg->u32_max_value = U32_MAX;
	} else {
		/* Cannot overflow (as long as bounds are consistent) */
		dst_reg->u32_min_value -= umax_val;
		dst_reg->u32_max_value -= umin_val;
	}
5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804
}

static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
			       struct bpf_reg_state *src_reg)
{
	s64 smin_val = src_reg->smin_value;
	s64 smax_val = src_reg->smax_value;
	u64 umin_val = src_reg->umin_value;
	u64 umax_val = src_reg->umax_value;

	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
		/* Overflow possible, we know nothing */
		dst_reg->smin_value = S64_MIN;
		dst_reg->smax_value = S64_MAX;
	} else {
		dst_reg->smin_value -= smax_val;
		dst_reg->smax_value -= smin_val;
	}
	if (dst_reg->umin_value < umax_val) {
		/* Overflow possible, we know nothing */
		dst_reg->umin_value = 0;
		dst_reg->umax_value = U64_MAX;
	} else {
		/* Cannot overflow (as long as bounds are consistent) */
		dst_reg->umin_value -= umax_val;
		dst_reg->umax_value -= umin_val;
	}
5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836
}

static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
				 struct bpf_reg_state *src_reg)
{
	s32 smin_val = src_reg->s32_min_value;
	u32 umin_val = src_reg->u32_min_value;
	u32 umax_val = src_reg->u32_max_value;

	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
		/* Ain't nobody got time to multiply that sign */
		__mark_reg32_unbounded(dst_reg);
		return;
	}
	/* Both values are positive, so we can work with unsigned and
	 * copy the result to signed (unless it exceeds S32_MAX).
	 */
	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
		/* Potential overflow, we know nothing */
		__mark_reg32_unbounded(dst_reg);
		return;
	}
	dst_reg->u32_min_value *= umin_val;
	dst_reg->u32_max_value *= umax_val;
	if (dst_reg->u32_max_value > S32_MAX) {
		/* Overflow possible, we know nothing */
		dst_reg->s32_min_value = S32_MIN;
		dst_reg->s32_max_value = S32_MAX;
	} else {
		dst_reg->s32_min_value = dst_reg->u32_min_value;
		dst_reg->s32_max_value = dst_reg->u32_max_value;
	}
5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847
}

static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
			       struct bpf_reg_state *src_reg)
{
	s64 smin_val = src_reg->smin_value;
	u64 umin_val = src_reg->umin_value;
	u64 umax_val = src_reg->umax_value;

	if (smin_val < 0 || dst_reg->smin_value < 0) {
		/* Ain't nobody got time to multiply that sign */
5848
		__mark_reg64_unbounded(dst_reg);
5849 5850 5851 5852 5853 5854 5855
		return;
	}
	/* Both values are positive, so we can work with unsigned and
	 * copy the result to signed (unless it exceeds S64_MAX).
	 */
	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
		/* Potential overflow, we know nothing */
5856
		__mark_reg64_unbounded(dst_reg);
5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870
		return;
	}
	dst_reg->umin_value *= umin_val;
	dst_reg->umax_value *= umax_val;
	if (dst_reg->umax_value > S64_MAX) {
		/* Overflow possible, we know nothing */
		dst_reg->smin_value = S64_MIN;
		dst_reg->smax_value = S64_MAX;
	} else {
		dst_reg->smin_value = dst_reg->umin_value;
		dst_reg->smax_value = dst_reg->umax_value;
	}
}

5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906
static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
				 struct bpf_reg_state *src_reg)
{
	bool src_known = tnum_subreg_is_const(src_reg->var_off);
	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
	s32 smin_val = src_reg->s32_min_value;
	u32 umax_val = src_reg->u32_max_value;

	/* Assuming scalar64_min_max_and will be called so its safe
	 * to skip updating register for known 32-bit case.
	 */
	if (src_known && dst_known)
		return;

	/* We get our minimum from the var_off, since that's inherently
	 * bitwise.  Our maximum is the minimum of the operands' maxima.
	 */
	dst_reg->u32_min_value = var32_off.value;
	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
		/* Lose signed bounds when ANDing negative numbers,
		 * ain't nobody got time for that.
		 */
		dst_reg->s32_min_value = S32_MIN;
		dst_reg->s32_max_value = S32_MAX;
	} else {
		/* ANDing two positives gives a positive, so safe to
		 * cast result into s64.
		 */
		dst_reg->s32_min_value = dst_reg->u32_min_value;
		dst_reg->s32_max_value = dst_reg->u32_max_value;
	}

}

5907 5908 5909
static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
			       struct bpf_reg_state *src_reg)
{
5910 5911
	bool src_known = tnum_is_const(src_reg->var_off);
	bool dst_known = tnum_is_const(dst_reg->var_off);
5912 5913 5914
	s64 smin_val = src_reg->smin_value;
	u64 umax_val = src_reg->umax_value;

5915
	if (src_known && dst_known) {
5916
		__mark_reg_known(dst_reg, dst_reg->var_off.value);
5917 5918 5919
		return;
	}

5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941
	/* We get our minimum from the var_off, since that's inherently
	 * bitwise.  Our maximum is the minimum of the operands' maxima.
	 */
	dst_reg->umin_value = dst_reg->var_off.value;
	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
	if (dst_reg->smin_value < 0 || smin_val < 0) {
		/* Lose signed bounds when ANDing negative numbers,
		 * ain't nobody got time for that.
		 */
		dst_reg->smin_value = S64_MIN;
		dst_reg->smax_value = S64_MAX;
	} else {
		/* ANDing two positives gives a positive, so safe to
		 * cast result into s64.
		 */
		dst_reg->smin_value = dst_reg->umin_value;
		dst_reg->smax_value = dst_reg->umax_value;
	}
	/* We may learn something more from the var_off */
	__update_reg_bounds(dst_reg);
}

5942 5943 5944 5945 5946 5947
static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
				struct bpf_reg_state *src_reg)
{
	bool src_known = tnum_subreg_is_const(src_reg->var_off);
	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
5948 5949
	s32 smin_val = src_reg->s32_min_value;
	u32 umin_val = src_reg->u32_min_value;
5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971

	/* Assuming scalar64_min_max_or will be called so it is safe
	 * to skip updating register for known case.
	 */
	if (src_known && dst_known)
		return;

	/* We get our maximum from the var_off, and our minimum is the
	 * maximum of the operands' minima
	 */
	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
		/* Lose signed bounds when ORing negative numbers,
		 * ain't nobody got time for that.
		 */
		dst_reg->s32_min_value = S32_MIN;
		dst_reg->s32_max_value = S32_MAX;
	} else {
		/* ORing two positives gives a positive, so safe to
		 * cast result into s64.
		 */
5972 5973
		dst_reg->s32_min_value = dst_reg->u32_min_value;
		dst_reg->s32_max_value = dst_reg->u32_max_value;
5974 5975 5976
	}
}

5977 5978 5979
static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
			      struct bpf_reg_state *src_reg)
{
5980 5981
	bool src_known = tnum_is_const(src_reg->var_off);
	bool dst_known = tnum_is_const(dst_reg->var_off);
5982 5983 5984
	s64 smin_val = src_reg->smin_value;
	u64 umin_val = src_reg->umin_value;

5985
	if (src_known && dst_known) {
5986
		__mark_reg_known(dst_reg, dst_reg->var_off.value);
5987 5988 5989
		return;
	}

5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011
	/* We get our maximum from the var_off, and our minimum is the
	 * maximum of the operands' minima
	 */
	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
	if (dst_reg->smin_value < 0 || smin_val < 0) {
		/* Lose signed bounds when ORing negative numbers,
		 * ain't nobody got time for that.
		 */
		dst_reg->smin_value = S64_MIN;
		dst_reg->smax_value = S64_MAX;
	} else {
		/* ORing two positives gives a positive, so safe to
		 * cast result into s64.
		 */
		dst_reg->smin_value = dst_reg->umin_value;
		dst_reg->smax_value = dst_reg->umax_value;
	}
	/* We may learn something more from the var_off */
	__update_reg_bounds(dst_reg);
}

6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072
static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
				 struct bpf_reg_state *src_reg)
{
	bool src_known = tnum_subreg_is_const(src_reg->var_off);
	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
	s32 smin_val = src_reg->s32_min_value;

	/* Assuming scalar64_min_max_xor will be called so it is safe
	 * to skip updating register for known case.
	 */
	if (src_known && dst_known)
		return;

	/* We get both minimum and maximum from the var32_off. */
	dst_reg->u32_min_value = var32_off.value;
	dst_reg->u32_max_value = var32_off.value | var32_off.mask;

	if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
		/* XORing two positive sign numbers gives a positive,
		 * so safe to cast u32 result into s32.
		 */
		dst_reg->s32_min_value = dst_reg->u32_min_value;
		dst_reg->s32_max_value = dst_reg->u32_max_value;
	} else {
		dst_reg->s32_min_value = S32_MIN;
		dst_reg->s32_max_value = S32_MAX;
	}
}

static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
			       struct bpf_reg_state *src_reg)
{
	bool src_known = tnum_is_const(src_reg->var_off);
	bool dst_known = tnum_is_const(dst_reg->var_off);
	s64 smin_val = src_reg->smin_value;

	if (src_known && dst_known) {
		/* dst_reg->var_off.value has been updated earlier */
		__mark_reg_known(dst_reg, dst_reg->var_off.value);
		return;
	}

	/* We get both minimum and maximum from the var_off. */
	dst_reg->umin_value = dst_reg->var_off.value;
	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;

	if (dst_reg->smin_value >= 0 && smin_val >= 0) {
		/* XORing two positive sign numbers gives a positive,
		 * so safe to cast u64 result into s64.
		 */
		dst_reg->smin_value = dst_reg->umin_value;
		dst_reg->smax_value = dst_reg->umax_value;
	} else {
		dst_reg->smin_value = S64_MIN;
		dst_reg->smax_value = S64_MAX;
	}

	__update_reg_bounds(dst_reg);
}

6073 6074
static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
				   u64 umin_val, u64 umax_val)
6075 6076 6077 6078
{
	/* We lose all sign bit information (except what we can pick
	 * up from var_off)
	 */
6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128
	dst_reg->s32_min_value = S32_MIN;
	dst_reg->s32_max_value = S32_MAX;
	/* If we might shift our top bit out, then we know nothing */
	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
		dst_reg->u32_min_value = 0;
		dst_reg->u32_max_value = U32_MAX;
	} else {
		dst_reg->u32_min_value <<= umin_val;
		dst_reg->u32_max_value <<= umax_val;
	}
}

static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
				 struct bpf_reg_state *src_reg)
{
	u32 umax_val = src_reg->u32_max_value;
	u32 umin_val = src_reg->u32_min_value;
	/* u32 alu operation will zext upper bits */
	struct tnum subreg = tnum_subreg(dst_reg->var_off);

	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
	/* Not required but being careful mark reg64 bounds as unknown so
	 * that we are forced to pick them up from tnum and zext later and
	 * if some path skips this step we are still safe.
	 */
	__mark_reg64_unbounded(dst_reg);
	__update_reg32_bounds(dst_reg);
}

static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
				   u64 umin_val, u64 umax_val)
{
	/* Special case <<32 because it is a common compiler pattern to sign
	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
	 * positive we know this shift will also be positive so we can track
	 * bounds correctly. Otherwise we lose all sign bit information except
	 * what we can pick up from var_off. Perhaps we can generalize this
	 * later to shifts of any length.
	 */
	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
	else
		dst_reg->smax_value = S64_MAX;

	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
	else
		dst_reg->smin_value = S64_MIN;

6129 6130 6131 6132 6133 6134 6135 6136
	/* If we might shift our top bit out, then we know nothing */
	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
		dst_reg->umin_value = 0;
		dst_reg->umax_value = U64_MAX;
	} else {
		dst_reg->umin_value <<= umin_val;
		dst_reg->umax_value <<= umax_val;
	}
6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148
}

static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
			       struct bpf_reg_state *src_reg)
{
	u64 umax_val = src_reg->umax_value;
	u64 umin_val = src_reg->umin_value;

	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);

6149 6150 6151 6152 6153
	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
	/* We may learn something more from the var_off */
	__update_reg_bounds(dst_reg);
}

6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185
static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
				 struct bpf_reg_state *src_reg)
{
	struct tnum subreg = tnum_subreg(dst_reg->var_off);
	u32 umax_val = src_reg->u32_max_value;
	u32 umin_val = src_reg->u32_min_value;

	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
	 * be negative, then either:
	 * 1) src_reg might be zero, so the sign bit of the result is
	 *    unknown, so we lose our signed bounds
	 * 2) it's known negative, thus the unsigned bounds capture the
	 *    signed bounds
	 * 3) the signed bounds cross zero, so they tell us nothing
	 *    about the result
	 * If the value in dst_reg is known nonnegative, then again the
	 * unsigned bounts capture the signed bounds.
	 * Thus, in all cases it suffices to blow away our signed bounds
	 * and rely on inferring new ones from the unsigned bounds and
	 * var_off of the result.
	 */
	dst_reg->s32_min_value = S32_MIN;
	dst_reg->s32_max_value = S32_MAX;

	dst_reg->var_off = tnum_rshift(subreg, umin_val);
	dst_reg->u32_min_value >>= umax_val;
	dst_reg->u32_max_value >>= umin_val;

	__mark_reg64_unbounded(dst_reg);
	__update_reg32_bounds(dst_reg);
}

6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210
static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
			       struct bpf_reg_state *src_reg)
{
	u64 umax_val = src_reg->umax_value;
	u64 umin_val = src_reg->umin_value;

	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
	 * be negative, then either:
	 * 1) src_reg might be zero, so the sign bit of the result is
	 *    unknown, so we lose our signed bounds
	 * 2) it's known negative, thus the unsigned bounds capture the
	 *    signed bounds
	 * 3) the signed bounds cross zero, so they tell us nothing
	 *    about the result
	 * If the value in dst_reg is known nonnegative, then again the
	 * unsigned bounts capture the signed bounds.
	 * Thus, in all cases it suffices to blow away our signed bounds
	 * and rely on inferring new ones from the unsigned bounds and
	 * var_off of the result.
	 */
	dst_reg->smin_value = S64_MIN;
	dst_reg->smax_value = S64_MAX;
	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
	dst_reg->umin_value >>= umax_val;
	dst_reg->umax_value >>= umin_val;
6211 6212 6213 6214 6215 6216

	/* Its not easy to operate on alu32 bounds here because it depends
	 * on bits being shifted in. Take easy way out and mark unbounded
	 * so we can recalculate later from tnum.
	 */
	__mark_reg32_unbounded(dst_reg);
6217 6218 6219
	__update_reg_bounds(dst_reg);
}

6220 6221
static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
				  struct bpf_reg_state *src_reg)
6222
{
6223
	u64 umin_val = src_reg->u32_min_value;
6224 6225 6226 6227

	/* Upon reaching here, src_known is true and
	 * umax_val is equal to umin_val.
	 */
6228 6229
	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
6230

6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254
	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);

	/* blow away the dst_reg umin_value/umax_value and rely on
	 * dst_reg var_off to refine the result.
	 */
	dst_reg->u32_min_value = 0;
	dst_reg->u32_max_value = U32_MAX;

	__mark_reg64_unbounded(dst_reg);
	__update_reg32_bounds(dst_reg);
}

static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
				struct bpf_reg_state *src_reg)
{
	u64 umin_val = src_reg->umin_value;

	/* Upon reaching here, src_known is true and umax_val is equal
	 * to umin_val.
	 */
	dst_reg->smin_value >>= umin_val;
	dst_reg->smax_value >>= umin_val;

	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
6255 6256 6257 6258 6259 6260

	/* blow away the dst_reg umin_value/umax_value and rely on
	 * dst_reg var_off to refine the result.
	 */
	dst_reg->umin_value = 0;
	dst_reg->umax_value = U64_MAX;
6261 6262 6263 6264 6265 6266

	/* Its not easy to operate on alu32 bounds here because it depends
	 * on bits being shifted in from upper 32-bits. Take easy way out
	 * and mark unbounded so we can recalculate later from tnum.
	 */
	__mark_reg32_unbounded(dst_reg);
6267 6268 6269
	__update_reg_bounds(dst_reg);
}

J
Jann Horn 已提交
6270 6271 6272 6273
/* WARNING: This function does calculations on 64-bit values, but the actual
 * execution may occur on 32-bit values. Therefore, things like bitshifts
 * need extra checks in the 32-bit case.
 */
6274 6275 6276 6277
static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
				      struct bpf_insn *insn,
				      struct bpf_reg_state *dst_reg,
				      struct bpf_reg_state src_reg)
A
Alexei Starovoitov 已提交
6278
{
6279
	struct bpf_reg_state *regs = cur_regs(env);
6280
	u8 opcode = BPF_OP(insn->code);
6281
	bool src_known;
6282 6283
	s64 smin_val, smax_val;
	u64 umin_val, umax_val;
6284 6285
	s32 s32_min_val, s32_max_val;
	u32 u32_min_val, u32_max_val;
J
Jann Horn 已提交
6286
	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
6287 6288
	u32 dst = insn->dst_reg;
	int ret;
6289
	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
6290

6291 6292 6293 6294
	smin_val = src_reg.smin_value;
	smax_val = src_reg.smax_value;
	umin_val = src_reg.umin_value;
	umax_val = src_reg.umax_value;
6295

6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322
	s32_min_val = src_reg.s32_min_value;
	s32_max_val = src_reg.s32_max_value;
	u32_min_val = src_reg.u32_min_value;
	u32_max_val = src_reg.u32_max_value;

	if (alu32) {
		src_known = tnum_subreg_is_const(src_reg.var_off);
		if ((src_known &&
		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
			/* Taint dst register if offset had invalid bounds
			 * derived from e.g. dead branches.
			 */
			__mark_reg_unknown(env, dst_reg);
			return 0;
		}
	} else {
		src_known = tnum_is_const(src_reg.var_off);
		if ((src_known &&
		     (smin_val != smax_val || umin_val != umax_val)) ||
		    smin_val > smax_val || umin_val > umax_val) {
			/* Taint dst register if offset had invalid bounds
			 * derived from e.g. dead branches.
			 */
			__mark_reg_unknown(env, dst_reg);
			return 0;
		}
6323 6324
	}

A
Alexei Starovoitov 已提交
6325 6326
	if (!src_known &&
	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
6327
		__mark_reg_unknown(env, dst_reg);
A
Alexei Starovoitov 已提交
6328 6329 6330
		return 0;
	}

6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344
	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
	 * There are two classes of instructions: The first class we track both
	 * alu32 and alu64 sign/unsigned bounds independently this provides the
	 * greatest amount of precision when alu operations are mixed with jmp32
	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
	 * and BPF_OR. This is possible because these ops have fairly easy to
	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
	 * See alu32 verifier tests for examples. The second class of
	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
	 * with regards to tracking sign/unsigned bounds because the bits may
	 * cross subreg boundaries in the alu64 case. When this happens we mark
	 * the reg unbounded in the subreg bound space and use the resulting
	 * tnum to calculate an approximation of the sign/unsigned bounds.
	 */
6345 6346
	switch (opcode) {
	case BPF_ADD:
6347 6348 6349 6350 6351
		ret = sanitize_val_alu(env, insn);
		if (ret < 0) {
			verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
			return ret;
		}
6352
		scalar32_min_max_add(dst_reg, &src_reg);
6353
		scalar_min_max_add(dst_reg, &src_reg);
6354
		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
6355 6356
		break;
	case BPF_SUB:
6357 6358 6359 6360 6361
		ret = sanitize_val_alu(env, insn);
		if (ret < 0) {
			verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
			return ret;
		}
6362
		scalar32_min_max_sub(dst_reg, &src_reg);
6363
		scalar_min_max_sub(dst_reg, &src_reg);
6364
		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
6365 6366
		break;
	case BPF_MUL:
6367 6368
		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
		scalar32_min_max_mul(dst_reg, &src_reg);
6369
		scalar_min_max_mul(dst_reg, &src_reg);
6370 6371
		break;
	case BPF_AND:
6372 6373
		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
		scalar32_min_max_and(dst_reg, &src_reg);
6374
		scalar_min_max_and(dst_reg, &src_reg);
6375 6376
		break;
	case BPF_OR:
6377 6378
		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
		scalar32_min_max_or(dst_reg, &src_reg);
6379
		scalar_min_max_or(dst_reg, &src_reg);
6380
		break;
6381 6382 6383 6384 6385
	case BPF_XOR:
		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
		scalar32_min_max_xor(dst_reg, &src_reg);
		scalar_min_max_xor(dst_reg, &src_reg);
		break;
6386
	case BPF_LSH:
J
Jann Horn 已提交
6387 6388 6389
		if (umax_val >= insn_bitness) {
			/* Shifts greater than 31 or 63 are undefined.
			 * This includes shifts by a negative number.
6390
			 */
6391
			mark_reg_unknown(env, regs, insn->dst_reg);
6392 6393
			break;
		}
6394 6395 6396 6397
		if (alu32)
			scalar32_min_max_lsh(dst_reg, &src_reg);
		else
			scalar_min_max_lsh(dst_reg, &src_reg);
6398 6399
		break;
	case BPF_RSH:
J
Jann Horn 已提交
6400 6401 6402
		if (umax_val >= insn_bitness) {
			/* Shifts greater than 31 or 63 are undefined.
			 * This includes shifts by a negative number.
6403
			 */
6404
			mark_reg_unknown(env, regs, insn->dst_reg);
6405 6406
			break;
		}
6407 6408 6409 6410
		if (alu32)
			scalar32_min_max_rsh(dst_reg, &src_reg);
		else
			scalar_min_max_rsh(dst_reg, &src_reg);
6411
		break;
6412 6413 6414 6415 6416 6417 6418 6419
	case BPF_ARSH:
		if (umax_val >= insn_bitness) {
			/* Shifts greater than 31 or 63 are undefined.
			 * This includes shifts by a negative number.
			 */
			mark_reg_unknown(env, regs, insn->dst_reg);
			break;
		}
6420 6421 6422 6423
		if (alu32)
			scalar32_min_max_arsh(dst_reg, &src_reg);
		else
			scalar_min_max_arsh(dst_reg, &src_reg);
6424
		break;
6425
	default:
6426
		mark_reg_unknown(env, regs, insn->dst_reg);
6427 6428 6429
		break;
	}

6430 6431 6432
	/* ALU32 ops are zero extended into 64bit register */
	if (alu32)
		zext_32_to_64(dst_reg);
J
Jann Horn 已提交
6433

6434
	__update_reg_bounds(dst_reg);
6435 6436
	__reg_deduce_bounds(dst_reg);
	__reg_bound_offset(dst_reg);
6437 6438 6439 6440 6441 6442 6443 6444 6445
	return 0;
}

/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
 * and var_off.
 */
static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
				   struct bpf_insn *insn)
{
6446 6447 6448
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
6449 6450
	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
	u8 opcode = BPF_OP(insn->code);
6451
	int err;
6452 6453 6454 6455 6456

	dst_reg = &regs[insn->dst_reg];
	src_reg = NULL;
	if (dst_reg->type != SCALAR_VALUE)
		ptr_reg = dst_reg;
6457 6458 6459 6460 6461
	else
		/* Make sure ID is cleared otherwise dst_reg min/max could be
		 * incorrectly propagated into other registers by find_equal_scalars()
		 */
		dst_reg->id = 0;
6462 6463 6464 6465 6466
	if (BPF_SRC(insn->code) == BPF_X) {
		src_reg = &regs[insn->src_reg];
		if (src_reg->type != SCALAR_VALUE) {
			if (dst_reg->type != SCALAR_VALUE) {
				/* Combining two pointers by any ALU op yields
6467 6468
				 * an arbitrary scalar. Disallow all math except
				 * pointer subtraction
6469
				 */
6470
				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6471 6472
					mark_reg_unknown(env, regs, insn->dst_reg);
					return 0;
6473
				}
6474 6475 6476 6477
				verbose(env, "R%d pointer %s pointer prohibited\n",
					insn->dst_reg,
					bpf_alu_string[opcode >> 4]);
				return -EACCES;
6478 6479 6480 6481 6482
			} else {
				/* scalar += pointer
				 * This is legal, but we have to reverse our
				 * src/dest handling in computing the range
				 */
6483 6484 6485
				err = mark_chain_precision(env, insn->dst_reg);
				if (err)
					return err;
6486 6487
				return adjust_ptr_min_max_vals(env, insn,
							       src_reg, dst_reg);
6488 6489 6490
			}
		} else if (ptr_reg) {
			/* pointer += scalar */
6491 6492 6493
			err = mark_chain_precision(env, insn->src_reg);
			if (err)
				return err;
6494 6495
			return adjust_ptr_min_max_vals(env, insn,
						       dst_reg, src_reg);
6496 6497 6498 6499 6500 6501
		}
	} else {
		/* Pretend the src is a reg with a known value, since we only
		 * need to be able to read from this state.
		 */
		off_reg.type = SCALAR_VALUE;
6502
		__mark_reg_known(&off_reg, insn->imm);
6503
		src_reg = &off_reg;
6504 6505 6506
		if (ptr_reg) /* pointer += K */
			return adjust_ptr_min_max_vals(env, insn,
						       ptr_reg, src_reg);
6507 6508 6509 6510
	}

	/* Got here implies adding two SCALAR_VALUEs */
	if (WARN_ON_ONCE(ptr_reg)) {
6511
		print_verifier_state(env, state);
6512
		verbose(env, "verifier internal error: unexpected ptr_reg\n");
6513 6514 6515
		return -EINVAL;
	}
	if (WARN_ON(!src_reg)) {
6516
		print_verifier_state(env, state);
6517
		verbose(env, "verifier internal error: no src_reg\n");
6518 6519 6520
		return -EINVAL;
	}
	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
6521 6522
}

6523
/* check validity of 32-bit and 64-bit arithmetic operations */
6524
static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
6525
{
6526
	struct bpf_reg_state *regs = cur_regs(env);
6527 6528 6529 6530 6531 6532 6533 6534
	u8 opcode = BPF_OP(insn->code);
	int err;

	if (opcode == BPF_END || opcode == BPF_NEG) {
		if (opcode == BPF_NEG) {
			if (BPF_SRC(insn->code) != 0 ||
			    insn->src_reg != BPF_REG_0 ||
			    insn->off != 0 || insn->imm != 0) {
6535
				verbose(env, "BPF_NEG uses reserved fields\n");
6536 6537 6538 6539
				return -EINVAL;
			}
		} else {
			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
6540 6541
			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
			    BPF_CLASS(insn->code) == BPF_ALU64) {
6542
				verbose(env, "BPF_END uses reserved fields\n");
6543 6544 6545 6546 6547
				return -EINVAL;
			}
		}

		/* check src operand */
6548
		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6549 6550 6551
		if (err)
			return err;

6552
		if (is_pointer_value(env, insn->dst_reg)) {
6553
			verbose(env, "R%d pointer arithmetic prohibited\n",
6554 6555 6556 6557
				insn->dst_reg);
			return -EACCES;
		}

6558
		/* check dest operand */
6559
		err = check_reg_arg(env, insn->dst_reg, DST_OP);
6560 6561 6562 6563 6564 6565 6566
		if (err)
			return err;

	} else if (opcode == BPF_MOV) {

		if (BPF_SRC(insn->code) == BPF_X) {
			if (insn->imm != 0 || insn->off != 0) {
6567
				verbose(env, "BPF_MOV uses reserved fields\n");
6568 6569 6570 6571
				return -EINVAL;
			}

			/* check src operand */
6572
			err = check_reg_arg(env, insn->src_reg, SRC_OP);
6573 6574 6575 6576
			if (err)
				return err;
		} else {
			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
6577
				verbose(env, "BPF_MOV uses reserved fields\n");
6578 6579 6580 6581
				return -EINVAL;
			}
		}

6582 6583
		/* check dest operand, mark as required later */
		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6584 6585 6586 6587
		if (err)
			return err;

		if (BPF_SRC(insn->code) == BPF_X) {
6588 6589 6590
			struct bpf_reg_state *src_reg = regs + insn->src_reg;
			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;

6591 6592 6593 6594
			if (BPF_CLASS(insn->code) == BPF_ALU64) {
				/* case: R1 = R2
				 * copy register state to dest reg
				 */
6595 6596 6597 6598 6599 6600
				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
					/* Assign src and dst registers the same ID
					 * that will be used by find_equal_scalars()
					 * to propagate min/max range.
					 */
					src_reg->id = ++env->id_gen;
6601 6602
				*dst_reg = *src_reg;
				dst_reg->live |= REG_LIVE_WRITTEN;
6603
				dst_reg->subreg_def = DEF_NOT_SUBREG;
6604
			} else {
6605
				/* R1 = (u32) R2 */
6606
				if (is_pointer_value(env, insn->src_reg)) {
6607 6608
					verbose(env,
						"R%d partial copy of pointer\n",
6609 6610
						insn->src_reg);
					return -EACCES;
6611 6612
				} else if (src_reg->type == SCALAR_VALUE) {
					*dst_reg = *src_reg;
6613 6614 6615 6616 6617
					/* Make sure ID is cleared otherwise
					 * dst_reg min/max could be incorrectly
					 * propagated into src_reg by find_equal_scalars()
					 */
					dst_reg->id = 0;
6618
					dst_reg->live |= REG_LIVE_WRITTEN;
6619
					dst_reg->subreg_def = env->insn_idx + 1;
6620 6621 6622
				} else {
					mark_reg_unknown(env, regs,
							 insn->dst_reg);
6623
				}
6624
				zext_32_to_64(dst_reg);
6625 6626 6627 6628 6629
			}
		} else {
			/* case: R = imm
			 * remember the value we stored into this reg
			 */
6630 6631
			/* clear any state __mark_reg_known doesn't set */
			mark_reg_unknown(env, regs, insn->dst_reg);
6632
			regs[insn->dst_reg].type = SCALAR_VALUE;
6633 6634 6635 6636 6637 6638 6639
			if (BPF_CLASS(insn->code) == BPF_ALU64) {
				__mark_reg_known(regs + insn->dst_reg,
						 insn->imm);
			} else {
				__mark_reg_known(regs + insn->dst_reg,
						 (u32)insn->imm);
			}
6640 6641 6642
		}

	} else if (opcode > BPF_END) {
6643
		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
6644 6645 6646 6647 6648 6649
		return -EINVAL;

	} else {	/* all other ALU ops: and, sub, xor, add, ... */

		if (BPF_SRC(insn->code) == BPF_X) {
			if (insn->imm != 0 || insn->off != 0) {
6650
				verbose(env, "BPF_ALU uses reserved fields\n");
6651 6652 6653
				return -EINVAL;
			}
			/* check src1 operand */
6654
			err = check_reg_arg(env, insn->src_reg, SRC_OP);
6655 6656 6657 6658
			if (err)
				return err;
		} else {
			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
6659
				verbose(env, "BPF_ALU uses reserved fields\n");
6660 6661 6662 6663 6664
				return -EINVAL;
			}
		}

		/* check src2 operand */
6665
		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6666 6667 6668 6669 6670
		if (err)
			return err;

		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
6671
			verbose(env, "div by zero\n");
6672 6673 6674
			return -EINVAL;
		}

R
Rabin Vincent 已提交
6675 6676 6677 6678 6679
		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;

			if (insn->imm < 0 || insn->imm >= size) {
6680
				verbose(env, "invalid shift %d\n", insn->imm);
R
Rabin Vincent 已提交
6681 6682 6683 6684
				return -EINVAL;
			}
		}

A
Alexei Starovoitov 已提交
6685
		/* check dest operand */
6686
		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
A
Alexei Starovoitov 已提交
6687 6688 6689
		if (err)
			return err;

6690
		return adjust_reg_min_max_vals(env, insn);
6691 6692 6693 6694 6695
	}

	return 0;
}

6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717
static void __find_good_pkt_pointers(struct bpf_func_state *state,
				     struct bpf_reg_state *dst_reg,
				     enum bpf_reg_type type, u16 new_range)
{
	struct bpf_reg_state *reg;
	int i;

	for (i = 0; i < MAX_BPF_REG; i++) {
		reg = &state->regs[i];
		if (reg->type == type && reg->id == dst_reg->id)
			/* keep the maximum range already checked */
			reg->range = max(reg->range, new_range);
	}

	bpf_for_each_spilled_reg(i, state, reg) {
		if (!reg)
			continue;
		if (reg->type == type && reg->id == dst_reg->id)
			reg->range = max(reg->range, new_range);
	}
}

6718
static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
6719
				   struct bpf_reg_state *dst_reg,
6720
				   enum bpf_reg_type type,
6721
				   bool range_right_open)
A
Alexei Starovoitov 已提交
6722
{
6723
	u16 new_range;
6724
	int i;
6725

6726 6727
	if (dst_reg->off < 0 ||
	    (dst_reg->off == 0 && range_right_open))
6728 6729 6730
		/* This doesn't give us any range */
		return;

6731 6732
	if (dst_reg->umax_value > MAX_PACKET_OFF ||
	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
6733 6734 6735 6736 6737
		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
		 * than pkt_end, but that's because it's also less than pkt.
		 */
		return;

6738 6739 6740 6741 6742
	new_range = dst_reg->off;
	if (range_right_open)
		new_range--;

	/* Examples for register markings:
6743
	 *
6744
	 * pkt_data in dst register:
6745 6746 6747 6748 6749 6750
	 *
	 *   r2 = r3;
	 *   r2 += 8;
	 *   if (r2 > pkt_end) goto <handle exception>
	 *   <access okay>
	 *
6751 6752 6753 6754 6755
	 *   r2 = r3;
	 *   r2 += 8;
	 *   if (r2 < pkt_end) goto <access okay>
	 *   <handle exception>
	 *
6756 6757 6758 6759 6760
	 *   Where:
	 *     r2 == dst_reg, pkt_end == src_reg
	 *     r2=pkt(id=n,off=8,r=0)
	 *     r3=pkt(id=n,off=0,r=0)
	 *
6761
	 * pkt_data in src register:
6762 6763 6764 6765 6766 6767
	 *
	 *   r2 = r3;
	 *   r2 += 8;
	 *   if (pkt_end >= r2) goto <access okay>
	 *   <handle exception>
	 *
6768 6769 6770 6771 6772
	 *   r2 = r3;
	 *   r2 += 8;
	 *   if (pkt_end <= r2) goto <handle exception>
	 *   <access okay>
	 *
6773 6774 6775 6776 6777 6778
	 *   Where:
	 *     pkt_end == dst_reg, r2 == src_reg
	 *     r2=pkt(id=n,off=8,r=0)
	 *     r3=pkt(id=n,off=0,r=0)
	 *
	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
6779 6780 6781
	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
	 * and [r3, r3 + 8-1) respectively is safe to access depending on
	 * the check.
A
Alexei Starovoitov 已提交
6782
	 */
6783

6784 6785 6786 6787 6788
	/* If our ids match, then we must have the same max_value.  And we
	 * don't care about the other reg's fixed offset, since if it's too big
	 * the range won't allow anything.
	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
	 */
6789 6790 6791
	for (i = 0; i <= vstate->curframe; i++)
		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
					 new_range);
A
Alexei Starovoitov 已提交
6792 6793
}

6794
static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
6795
{
6796 6797
	struct tnum subreg = tnum_subreg(reg->var_off);
	s32 sval = (s32)val;
6798

6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862
	switch (opcode) {
	case BPF_JEQ:
		if (tnum_is_const(subreg))
			return !!tnum_equals_const(subreg, val);
		break;
	case BPF_JNE:
		if (tnum_is_const(subreg))
			return !tnum_equals_const(subreg, val);
		break;
	case BPF_JSET:
		if ((~subreg.mask & subreg.value) & val)
			return 1;
		if (!((subreg.mask | subreg.value) & val))
			return 0;
		break;
	case BPF_JGT:
		if (reg->u32_min_value > val)
			return 1;
		else if (reg->u32_max_value <= val)
			return 0;
		break;
	case BPF_JSGT:
		if (reg->s32_min_value > sval)
			return 1;
		else if (reg->s32_max_value < sval)
			return 0;
		break;
	case BPF_JLT:
		if (reg->u32_max_value < val)
			return 1;
		else if (reg->u32_min_value >= val)
			return 0;
		break;
	case BPF_JSLT:
		if (reg->s32_max_value < sval)
			return 1;
		else if (reg->s32_min_value >= sval)
			return 0;
		break;
	case BPF_JGE:
		if (reg->u32_min_value >= val)
			return 1;
		else if (reg->u32_max_value < val)
			return 0;
		break;
	case BPF_JSGE:
		if (reg->s32_min_value >= sval)
			return 1;
		else if (reg->s32_max_value < sval)
			return 0;
		break;
	case BPF_JLE:
		if (reg->u32_max_value <= val)
			return 1;
		else if (reg->u32_min_value > val)
			return 0;
		break;
	case BPF_JSLE:
		if (reg->s32_max_value <= sval)
			return 1;
		else if (reg->s32_min_value > sval)
			return 0;
		break;
	}
6863

6864 6865
	return -1;
}
J
Jiong Wang 已提交
6866

6867 6868 6869 6870

static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
{
	s64 sval = (s64)val;
6871

6872 6873 6874 6875 6876 6877 6878 6879 6880
	switch (opcode) {
	case BPF_JEQ:
		if (tnum_is_const(reg->var_off))
			return !!tnum_equals_const(reg->var_off, val);
		break;
	case BPF_JNE:
		if (tnum_is_const(reg->var_off))
			return !tnum_equals_const(reg->var_off, val);
		break;
6881 6882 6883 6884 6885 6886
	case BPF_JSET:
		if ((~reg->var_off.mask & reg->var_off.value) & val)
			return 1;
		if (!((reg->var_off.mask | reg->var_off.value) & val))
			return 0;
		break;
6887 6888 6889 6890 6891 6892 6893
	case BPF_JGT:
		if (reg->umin_value > val)
			return 1;
		else if (reg->umax_value <= val)
			return 0;
		break;
	case BPF_JSGT:
6894
		if (reg->smin_value > sval)
6895
			return 1;
6896
		else if (reg->smax_value < sval)
6897 6898 6899 6900 6901 6902 6903 6904 6905
			return 0;
		break;
	case BPF_JLT:
		if (reg->umax_value < val)
			return 1;
		else if (reg->umin_value >= val)
			return 0;
		break;
	case BPF_JSLT:
6906
		if (reg->smax_value < sval)
6907
			return 1;
6908
		else if (reg->smin_value >= sval)
6909 6910 6911 6912 6913 6914 6915 6916 6917
			return 0;
		break;
	case BPF_JGE:
		if (reg->umin_value >= val)
			return 1;
		else if (reg->umax_value < val)
			return 0;
		break;
	case BPF_JSGE:
6918
		if (reg->smin_value >= sval)
6919
			return 1;
6920
		else if (reg->smax_value < sval)
6921 6922 6923 6924 6925 6926 6927 6928 6929
			return 0;
		break;
	case BPF_JLE:
		if (reg->umax_value <= val)
			return 1;
		else if (reg->umin_value > val)
			return 0;
		break;
	case BPF_JSLE:
6930
		if (reg->smax_value <= sval)
6931
			return 1;
6932
		else if (reg->smin_value > sval)
6933 6934 6935 6936 6937 6938 6939
			return 0;
		break;
	}

	return -1;
}

6940 6941 6942 6943 6944 6945
/* compute branch direction of the expression "if (reg opcode val) goto target;"
 * and return:
 *  1 - branch will be taken and "goto target" will be executed
 *  0 - branch will not be taken and fall-through to next insn
 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
 *      range [0,10]
6946
 */
6947 6948
static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
			   bool is_jmp32)
6949
{
6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968
	if (__is_pointer_value(false, reg)) {
		if (!reg_type_not_null(reg->type))
			return -1;

		/* If pointer is valid tests against zero will fail so we can
		 * use this to direct branch taken.
		 */
		if (val != 0)
			return -1;

		switch (opcode) {
		case BPF_JEQ:
			return 0;
		case BPF_JNE:
			return 1;
		default:
			return -1;
		}
	}
6969

6970 6971 6972
	if (is_jmp32)
		return is_branch32_taken(reg, val, opcode);
	return is_branch64_taken(reg, val, opcode);
6973 6974
}

6975 6976 6977
/* Adjusts the register min/max values in the case that the dst_reg is the
 * variable register that we are working on, and src_reg is a constant or we're
 * simply doing a BPF_K check.
6978
 * In JEQ/JNE cases we also adjust the var_off values.
6979 6980
 */
static void reg_set_min_max(struct bpf_reg_state *true_reg,
6981 6982
			    struct bpf_reg_state *false_reg,
			    u64 val, u32 val32,
J
Jiong Wang 已提交
6983
			    u8 opcode, bool is_jmp32)
6984
{
6985 6986 6987 6988 6989 6990
	struct tnum false_32off = tnum_subreg(false_reg->var_off);
	struct tnum false_64off = false_reg->var_off;
	struct tnum true_32off = tnum_subreg(true_reg->var_off);
	struct tnum true_64off = true_reg->var_off;
	s64 sval = (s64)val;
	s32 sval32 = (s32)val32;
6991

6992 6993 6994 6995 6996 6997 6998 6999
	/* If the dst_reg is a pointer, we can't learn anything about its
	 * variable offset from the compare (unless src_reg were a pointer into
	 * the same object, but we don't bother with that.
	 * Since false_reg and true_reg have the same type by construction, we
	 * only need to check one of them for pointerness.
	 */
	if (__is_pointer_value(false, false_reg))
		return;
7000

7001 7002 7003
	switch (opcode) {
	case BPF_JEQ:
	case BPF_JNE:
7004 7005 7006 7007
	{
		struct bpf_reg_state *reg =
			opcode == BPF_JEQ ? true_reg : false_reg;

7008 7009 7010 7011 7012 7013 7014
		/* JEQ/JNE comparison doesn't change the register equivalence.
		 * r1 = r2;
		 * if (r1 == 42) goto label;
		 * ...
		 * label: // here both r1 and r2 are known to be 42.
		 *
		 * Hence when marking register as known preserve it's ID.
7015
		 */
7016 7017 7018
		if (is_jmp32)
			__mark_reg32_known(reg, val32);
		else
7019
			___mark_reg_known(reg, val);
7020
		break;
7021
	}
7022
	case BPF_JSET:
7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033
		if (is_jmp32) {
			false_32off = tnum_and(false_32off, tnum_const(~val32));
			if (is_power_of_2(val32))
				true_32off = tnum_or(true_32off,
						     tnum_const(val32));
		} else {
			false_64off = tnum_and(false_64off, tnum_const(~val));
			if (is_power_of_2(val))
				true_64off = tnum_or(true_64off,
						     tnum_const(val));
		}
7034
		break;
7035
	case BPF_JGE:
7036 7037
	case BPF_JGT:
	{
7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052
		if (is_jmp32) {
			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;

			false_reg->u32_max_value = min(false_reg->u32_max_value,
						       false_umax);
			true_reg->u32_min_value = max(true_reg->u32_min_value,
						      true_umin);
		} else {
			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;

			false_reg->umax_value = min(false_reg->umax_value, false_umax);
			true_reg->umin_value = max(true_reg->umin_value, true_umin);
		}
7053
		break;
7054
	}
7055
	case BPF_JSGE:
7056 7057
	case BPF_JSGT:
	{
7058 7059 7060
		if (is_jmp32) {
			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
7061

7062 7063 7064 7065 7066 7067 7068 7069 7070
			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
		} else {
			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;

			false_reg->smax_value = min(false_reg->smax_value, false_smax);
			true_reg->smin_value = max(true_reg->smin_value, true_smin);
		}
7071
		break;
7072
	}
7073
	case BPF_JLE:
7074 7075
	case BPF_JLT:
	{
7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090
		if (is_jmp32) {
			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;

			false_reg->u32_min_value = max(false_reg->u32_min_value,
						       false_umin);
			true_reg->u32_max_value = min(true_reg->u32_max_value,
						      true_umax);
		} else {
			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;

			false_reg->umin_value = max(false_reg->umin_value, false_umin);
			true_reg->umax_value = min(true_reg->umax_value, true_umax);
		}
7091
		break;
7092
	}
7093
	case BPF_JSLE:
7094 7095
	case BPF_JSLT:
	{
7096 7097 7098
		if (is_jmp32) {
			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
7099

7100 7101 7102 7103 7104 7105 7106 7107 7108
			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
		} else {
			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;

			false_reg->smin_value = max(false_reg->smin_value, false_smin);
			true_reg->smax_value = min(true_reg->smax_value, true_smax);
		}
7109
		break;
7110
	}
7111
	default:
7112
		return;
7113 7114
	}

7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127
	if (is_jmp32) {
		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
					     tnum_subreg(false_32off));
		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
					    tnum_subreg(true_32off));
		__reg_combine_32_into_64(false_reg);
		__reg_combine_32_into_64(true_reg);
	} else {
		false_reg->var_off = false_64off;
		true_reg->var_off = true_64off;
		__reg_combine_64_into_32(false_reg);
		__reg_combine_64_into_32(true_reg);
	}
7128 7129
}

7130 7131
/* Same as above, but for the case that dst_reg holds a constant and src_reg is
 * the variable reg.
7132 7133
 */
static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
7134 7135
				struct bpf_reg_state *false_reg,
				u64 val, u32 val32,
J
Jiong Wang 已提交
7136
				u8 opcode, bool is_jmp32)
7137
{
7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156
	/* How can we transform "a <op> b" into "b <op> a"? */
	static const u8 opcode_flip[16] = {
		/* these stay the same */
		[BPF_JEQ  >> 4] = BPF_JEQ,
		[BPF_JNE  >> 4] = BPF_JNE,
		[BPF_JSET >> 4] = BPF_JSET,
		/* these swap "lesser" and "greater" (L and G in the opcodes) */
		[BPF_JGE  >> 4] = BPF_JLE,
		[BPF_JGT  >> 4] = BPF_JLT,
		[BPF_JLE  >> 4] = BPF_JGE,
		[BPF_JLT  >> 4] = BPF_JGT,
		[BPF_JSGE >> 4] = BPF_JSLE,
		[BPF_JSGT >> 4] = BPF_JSLT,
		[BPF_JSLE >> 4] = BPF_JSGE,
		[BPF_JSLT >> 4] = BPF_JSGT
	};
	opcode = opcode_flip[opcode >> 4];
	/* This uses zero as "not present in table"; luckily the zero opcode,
	 * BPF_JA, can't get here.
7157
	 */
7158
	if (opcode)
7159
		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
7160 7161 7162 7163 7164 7165
}

/* Regs are known to be equal, so intersect their min/max/var_off */
static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
				  struct bpf_reg_state *dst_reg)
{
7166 7167 7168 7169 7170 7171 7172 7173
	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
							dst_reg->umin_value);
	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
							dst_reg->umax_value);
	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
							dst_reg->smin_value);
	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
							dst_reg->smax_value);
7174 7175
	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
							     dst_reg->var_off);
7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190
	/* We might have learned new bounds from the var_off. */
	__update_reg_bounds(src_reg);
	__update_reg_bounds(dst_reg);
	/* We might have learned something about the sign bit. */
	__reg_deduce_bounds(src_reg);
	__reg_deduce_bounds(dst_reg);
	/* We might have learned some bits from the bounds. */
	__reg_bound_offset(src_reg);
	__reg_bound_offset(dst_reg);
	/* Intersecting with the old var_off might have improved our bounds
	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
	 * then new var_off is (0; 0x7f...fc) which improves our umax.
	 */
	__update_reg_bounds(src_reg);
	__update_reg_bounds(dst_reg);
7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204
}

static void reg_combine_min_max(struct bpf_reg_state *true_src,
				struct bpf_reg_state *true_dst,
				struct bpf_reg_state *false_src,
				struct bpf_reg_state *false_dst,
				u8 opcode)
{
	switch (opcode) {
	case BPF_JEQ:
		__reg_combine_min_max(true_src, true_dst);
		break;
	case BPF_JNE:
		__reg_combine_min_max(false_src, false_dst);
7205
		break;
7206
	}
7207 7208
}

7209 7210
static void mark_ptr_or_null_reg(struct bpf_func_state *state,
				 struct bpf_reg_state *reg, u32 id,
7211
				 bool is_null)
7212
{
7213 7214
	if (reg_type_may_be_null(reg->type) && reg->id == id &&
	    !WARN_ON_ONCE(!reg->id)) {
7215 7216 7217 7218
		/* Old offset (both fixed and variable parts) should
		 * have been known-zero, because we don't allow pointer
		 * arithmetic on pointers that might be NULL.
		 */
7219 7220
		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
				 !tnum_equals_const(reg->var_off, 0) ||
7221
				 reg->off)) {
7222 7223
			__mark_reg_known_zero(reg);
			reg->off = 0;
7224 7225 7226
		}
		if (is_null) {
			reg->type = SCALAR_VALUE;
7227
		} else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
7228 7229 7230
			const struct bpf_map *map = reg->map_ptr;

			if (map->inner_map_meta) {
7231
				reg->type = CONST_PTR_TO_MAP;
7232 7233
				reg->map_ptr = map->inner_map_meta;
			} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
7234
				reg->type = PTR_TO_XDP_SOCK;
7235 7236 7237
			} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
				   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
				reg->type = PTR_TO_SOCKET;
7238 7239 7240
			} else {
				reg->type = PTR_TO_MAP_VALUE;
			}
7241 7242
		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
			reg->type = PTR_TO_SOCKET;
7243 7244
		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
			reg->type = PTR_TO_SOCK_COMMON;
7245 7246
		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
			reg->type = PTR_TO_TCP_SOCK;
7247 7248
		} else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
			reg->type = PTR_TO_BTF_ID;
7249 7250
		} else if (reg->type == PTR_TO_MEM_OR_NULL) {
			reg->type = PTR_TO_MEM;
7251 7252 7253 7254
		} else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
			reg->type = PTR_TO_RDONLY_BUF;
		} else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
			reg->type = PTR_TO_RDWR_BUF;
7255
		}
7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268
		if (is_null) {
			/* We don't need id and ref_obj_id from this point
			 * onwards anymore, thus we should better reset it,
			 * so that state pruning has chances to take effect.
			 */
			reg->id = 0;
			reg->ref_obj_id = 0;
		} else if (!reg_may_point_to_spin_lock(reg)) {
			/* For not-NULL ptr, reg->ref_obj_id will be reset
			 * in release_reg_references().
			 *
			 * reg->id is still used by spin_lock ptr. Other
			 * than spin_lock ptr type, reg->id can be reset.
7269 7270
			 */
			reg->id = 0;
7271
		}
7272 7273 7274
	}
}

7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290
static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
				    bool is_null)
{
	struct bpf_reg_state *reg;
	int i;

	for (i = 0; i < MAX_BPF_REG; i++)
		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);

	bpf_for_each_spilled_reg(i, state, reg) {
		if (!reg)
			continue;
		mark_ptr_or_null_reg(state, reg, id, is_null);
	}
}

7291 7292 7293
/* The logic is similar to find_good_pkt_pointers(), both could eventually
 * be folded together at some point.
 */
7294 7295
static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
				  bool is_null)
7296
{
7297
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
7298
	struct bpf_reg_state *regs = state->regs;
7299
	u32 ref_obj_id = regs[regno].ref_obj_id;
7300
	u32 id = regs[regno].id;
7301
	int i;
7302

7303 7304 7305 7306 7307 7308
	if (ref_obj_id && ref_obj_id == id && is_null)
		/* regs[regno] is in the " == NULL" branch.
		 * No one could have freed the reference state before
		 * doing the NULL check.
		 */
		WARN_ON_ONCE(release_reference_state(state, id));
7309

7310 7311
	for (i = 0; i <= vstate->curframe; i++)
		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
7312 7313
}

7314 7315 7316 7317 7318 7319 7320 7321 7322
static bool try_match_pkt_pointers(const struct bpf_insn *insn,
				   struct bpf_reg_state *dst_reg,
				   struct bpf_reg_state *src_reg,
				   struct bpf_verifier_state *this_branch,
				   struct bpf_verifier_state *other_branch)
{
	if (BPF_SRC(insn->code) != BPF_X)
		return false;

J
Jiong Wang 已提交
7323 7324 7325 7326
	/* Pointers are always 64-bit. */
	if (BPF_CLASS(insn->code) == BPF_JMP32)
		return false;

7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410
	switch (BPF_OP(insn->code)) {
	case BPF_JGT:
		if ((dst_reg->type == PTR_TO_PACKET &&
		     src_reg->type == PTR_TO_PACKET_END) ||
		    (dst_reg->type == PTR_TO_PACKET_META &&
		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
			find_good_pkt_pointers(this_branch, dst_reg,
					       dst_reg->type, false);
		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
			    src_reg->type == PTR_TO_PACKET) ||
			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
			    src_reg->type == PTR_TO_PACKET_META)) {
			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
			find_good_pkt_pointers(other_branch, src_reg,
					       src_reg->type, true);
		} else {
			return false;
		}
		break;
	case BPF_JLT:
		if ((dst_reg->type == PTR_TO_PACKET &&
		     src_reg->type == PTR_TO_PACKET_END) ||
		    (dst_reg->type == PTR_TO_PACKET_META &&
		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
			find_good_pkt_pointers(other_branch, dst_reg,
					       dst_reg->type, true);
		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
			    src_reg->type == PTR_TO_PACKET) ||
			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
			    src_reg->type == PTR_TO_PACKET_META)) {
			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
			find_good_pkt_pointers(this_branch, src_reg,
					       src_reg->type, false);
		} else {
			return false;
		}
		break;
	case BPF_JGE:
		if ((dst_reg->type == PTR_TO_PACKET &&
		     src_reg->type == PTR_TO_PACKET_END) ||
		    (dst_reg->type == PTR_TO_PACKET_META &&
		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
			find_good_pkt_pointers(this_branch, dst_reg,
					       dst_reg->type, true);
		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
			    src_reg->type == PTR_TO_PACKET) ||
			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
			    src_reg->type == PTR_TO_PACKET_META)) {
			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
			find_good_pkt_pointers(other_branch, src_reg,
					       src_reg->type, false);
		} else {
			return false;
		}
		break;
	case BPF_JLE:
		if ((dst_reg->type == PTR_TO_PACKET &&
		     src_reg->type == PTR_TO_PACKET_END) ||
		    (dst_reg->type == PTR_TO_PACKET_META &&
		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
			find_good_pkt_pointers(other_branch, dst_reg,
					       dst_reg->type, false);
		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
			    src_reg->type == PTR_TO_PACKET) ||
			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
			    src_reg->type == PTR_TO_PACKET_META)) {
			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
			find_good_pkt_pointers(this_branch, src_reg,
					       src_reg->type, true);
		} else {
			return false;
		}
		break;
	default:
		return false;
	}

	return true;
}

7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434
static void find_equal_scalars(struct bpf_verifier_state *vstate,
			       struct bpf_reg_state *known_reg)
{
	struct bpf_func_state *state;
	struct bpf_reg_state *reg;
	int i, j;

	for (i = 0; i <= vstate->curframe; i++) {
		state = vstate->frame[i];
		for (j = 0; j < MAX_BPF_REG; j++) {
			reg = &state->regs[j];
			if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
				*reg = *known_reg;
		}

		bpf_for_each_spilled_reg(j, state, reg) {
			if (!reg)
				continue;
			if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
				*reg = *known_reg;
		}
	}
}

7435
static int check_cond_jmp_op(struct bpf_verifier_env *env,
7436 7437
			     struct bpf_insn *insn, int *insn_idx)
{
7438 7439 7440
	struct bpf_verifier_state *this_branch = env->cur_state;
	struct bpf_verifier_state *other_branch;
	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
7441
	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
7442
	u8 opcode = BPF_OP(insn->code);
J
Jiong Wang 已提交
7443
	bool is_jmp32;
7444
	int pred = -1;
7445 7446
	int err;

J
Jiong Wang 已提交
7447 7448 7449
	/* Only conditional jumps are expected to reach here. */
	if (opcode == BPF_JA || opcode > BPF_JSLE) {
		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
7450 7451 7452 7453 7454
		return -EINVAL;
	}

	if (BPF_SRC(insn->code) == BPF_X) {
		if (insn->imm != 0) {
J
Jiong Wang 已提交
7455
			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7456 7457 7458 7459
			return -EINVAL;
		}

		/* check src1 operand */
7460
		err = check_reg_arg(env, insn->src_reg, SRC_OP);
7461 7462
		if (err)
			return err;
7463 7464

		if (is_pointer_value(env, insn->src_reg)) {
7465
			verbose(env, "R%d pointer comparison prohibited\n",
7466 7467 7468
				insn->src_reg);
			return -EACCES;
		}
7469
		src_reg = &regs[insn->src_reg];
7470 7471
	} else {
		if (insn->src_reg != BPF_REG_0) {
J
Jiong Wang 已提交
7472
			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7473 7474 7475 7476 7477
			return -EINVAL;
		}
	}

	/* check src2 operand */
7478
	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7479 7480 7481
	if (err)
		return err;

A
Alexei Starovoitov 已提交
7482
	dst_reg = &regs[insn->dst_reg];
J
Jiong Wang 已提交
7483
	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
A
Alexei Starovoitov 已提交
7484

7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500
	if (BPF_SRC(insn->code) == BPF_K) {
		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
	} else if (src_reg->type == SCALAR_VALUE &&
		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
		pred = is_branch_taken(dst_reg,
				       tnum_subreg(src_reg->var_off).value,
				       opcode,
				       is_jmp32);
	} else if (src_reg->type == SCALAR_VALUE &&
		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
		pred = is_branch_taken(dst_reg,
				       src_reg->var_off.value,
				       opcode,
				       is_jmp32);
	}

7501
	if (pred >= 0) {
7502 7503 7504 7505 7506
		/* If we get here with a dst_reg pointer type it is because
		 * above is_branch_taken() special cased the 0 comparison.
		 */
		if (!__is_pointer_value(false, dst_reg))
			err = mark_chain_precision(env, insn->dst_reg);
7507 7508 7509 7510 7511
		if (BPF_SRC(insn->code) == BPF_X && !err)
			err = mark_chain_precision(env, insn->src_reg);
		if (err)
			return err;
	}
7512 7513 7514 7515 7516 7517 7518 7519 7520
	if (pred == 1) {
		/* only follow the goto, ignore fall-through */
		*insn_idx += insn->off;
		return 0;
	} else if (pred == 0) {
		/* only follow fall-through branch, since
		 * that's where the program will go
		 */
		return 0;
7521 7522
	}

7523 7524
	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
				  false);
7525 7526
	if (!other_branch)
		return -EFAULT;
7527
	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
7528

7529 7530
	/* detect if we are comparing against a constant value so we can adjust
	 * our min/max values for our dst register.
7531 7532 7533 7534
	 * this is only legit if both are scalars (or pointers to the same
	 * object, I suppose, but we don't support that right now), because
	 * otherwise the different base pointers mean the offsets aren't
	 * comparable.
7535 7536
	 */
	if (BPF_SRC(insn->code) == BPF_X) {
J
Jiong Wang 已提交
7537 7538
		struct bpf_reg_state *src_reg = &regs[insn->src_reg];

7539
		if (dst_reg->type == SCALAR_VALUE &&
J
Jiong Wang 已提交
7540 7541
		    src_reg->type == SCALAR_VALUE) {
			if (tnum_is_const(src_reg->var_off) ||
7542 7543
			    (is_jmp32 &&
			     tnum_is_const(tnum_subreg(src_reg->var_off))))
7544
				reg_set_min_max(&other_branch_regs[insn->dst_reg],
J
Jiong Wang 已提交
7545
						dst_reg,
7546 7547
						src_reg->var_off.value,
						tnum_subreg(src_reg->var_off).value,
J
Jiong Wang 已提交
7548 7549
						opcode, is_jmp32);
			else if (tnum_is_const(dst_reg->var_off) ||
7550 7551
				 (is_jmp32 &&
				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
7552
				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
J
Jiong Wang 已提交
7553
						    src_reg,
7554 7555
						    dst_reg->var_off.value,
						    tnum_subreg(dst_reg->var_off).value,
J
Jiong Wang 已提交
7556 7557 7558
						    opcode, is_jmp32);
			else if (!is_jmp32 &&
				 (opcode == BPF_JEQ || opcode == BPF_JNE))
7559
				/* Comparing for equality, we can combine knowledge */
7560 7561
				reg_combine_min_max(&other_branch_regs[insn->src_reg],
						    &other_branch_regs[insn->dst_reg],
J
Jiong Wang 已提交
7562
						    src_reg, dst_reg, opcode);
7563 7564
			if (src_reg->id &&
			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
7565 7566 7567 7568
				find_equal_scalars(this_branch, src_reg);
				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
			}

7569 7570
		}
	} else if (dst_reg->type == SCALAR_VALUE) {
7571
		reg_set_min_max(&other_branch_regs[insn->dst_reg],
7572 7573
					dst_reg, insn->imm, (u32)insn->imm,
					opcode, is_jmp32);
7574 7575
	}

7576 7577
	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
7578 7579 7580 7581
		find_equal_scalars(this_branch, dst_reg);
		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
	}

J
Jiong Wang 已提交
7582 7583 7584 7585 7586
	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
	 * NOTE: these optimizations below are related with pointer comparison
	 *       which will never be JMP32.
	 */
	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
A
Alexei Starovoitov 已提交
7587
	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
7588 7589
	    reg_type_may_be_null(dst_reg->type)) {
		/* Mark all identical registers in each branch as either
7590 7591
		 * safe or unknown depending R == 0 or R != 0 conditional.
		 */
7592 7593 7594 7595
		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
				      opcode == BPF_JNE);
		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
				      opcode == BPF_JEQ);
7596 7597 7598
	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
					   this_branch, other_branch) &&
		   is_pointer_value(env, insn->dst_reg)) {
7599 7600
		verbose(env, "R%d pointer comparison prohibited\n",
			insn->dst_reg);
7601
		return -EACCES;
7602
	}
7603
	if (env->log.level & BPF_LOG_LEVEL)
7604
		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
7605 7606 7607 7608
	return 0;
}

/* verify BPF_LD_IMM64 instruction */
7609
static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
7610
{
7611
	struct bpf_insn_aux_data *aux = cur_aux(env);
7612
	struct bpf_reg_state *regs = cur_regs(env);
H
Hao Luo 已提交
7613
	struct bpf_reg_state *dst_reg;
7614
	struct bpf_map *map;
7615 7616 7617
	int err;

	if (BPF_SIZE(insn->code) != BPF_DW) {
7618
		verbose(env, "invalid BPF_LD_IMM insn\n");
7619 7620 7621
		return -EINVAL;
	}
	if (insn->off != 0) {
7622
		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
7623 7624 7625
		return -EINVAL;
	}

7626
	err = check_reg_arg(env, insn->dst_reg, DST_OP);
7627 7628 7629
	if (err)
		return err;

H
Hao Luo 已提交
7630
	dst_reg = &regs[insn->dst_reg];
7631 7632 7633
	if (insn->src_reg == 0) {
		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;

H
Hao Luo 已提交
7634
		dst_reg->type = SCALAR_VALUE;
7635
		__mark_reg_known(&regs[insn->dst_reg], imm);
7636
		return 0;
7637
	}
7638

H
Hao Luo 已提交
7639 7640 7641 7642 7643 7644 7645 7646 7647
	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
		mark_reg_known_zero(env, regs, insn->dst_reg);

		dst_reg->type = aux->btf_var.reg_type;
		switch (dst_reg->type) {
		case PTR_TO_MEM:
			dst_reg->mem_size = aux->btf_var.mem_size;
			break;
		case PTR_TO_BTF_ID:
H
Hao Luo 已提交
7648
		case PTR_TO_PERCPU_BTF_ID:
H
Hao Luo 已提交
7649 7650 7651 7652 7653 7654 7655 7656 7657
			dst_reg->btf_id = aux->btf_var.btf_id;
			break;
		default:
			verbose(env, "bpf verifier is misconfigured\n");
			return -EFAULT;
		}
		return 0;
	}

7658 7659
	map = env->used_maps[aux->map_index];
	mark_reg_known_zero(env, regs, insn->dst_reg);
H
Hao Luo 已提交
7660
	dst_reg->map_ptr = map;
7661 7662

	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
H
Hao Luo 已提交
7663 7664
		dst_reg->type = PTR_TO_MAP_VALUE;
		dst_reg->off = aux->map_off;
7665
		if (map_value_has_spin_lock(map))
H
Hao Luo 已提交
7666
			dst_reg->id = ++env->id_gen;
7667
	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
H
Hao Luo 已提交
7668
		dst_reg->type = CONST_PTR_TO_MAP;
7669 7670 7671 7672
	} else {
		verbose(env, "bpf verifier is misconfigured\n");
		return -EINVAL;
	}
7673 7674 7675 7676

	return 0;
}

7677 7678 7679 7680 7681
static bool may_access_skb(enum bpf_prog_type type)
{
	switch (type) {
	case BPF_PROG_TYPE_SOCKET_FILTER:
	case BPF_PROG_TYPE_SCHED_CLS:
7682
	case BPF_PROG_TYPE_SCHED_ACT:
7683 7684 7685 7686 7687 7688
		return true;
	default:
		return false;
	}
}

7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703
/* verify safety of LD_ABS|LD_IND instructions:
 * - they can only appear in the programs where ctx == skb
 * - since they are wrappers of function calls, they scratch R1-R5 registers,
 *   preserve R6-R9, and store return value into R0
 *
 * Implicit input:
 *   ctx == skb == R6 == CTX
 *
 * Explicit input:
 *   SRC == any register
 *   IMM == 32-bit immediate
 *
 * Output:
 *   R0 - 8/16/32-bit skb data converted to cpu endianness
 */
7704
static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
7705
{
7706
	struct bpf_reg_state *regs = cur_regs(env);
7707
	static const int ctx_reg = BPF_REG_6;
7708 7709 7710
	u8 mode = BPF_MODE(insn->code);
	int i, err;

7711
	if (!may_access_skb(resolve_prog_type(env->prog))) {
7712
		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
7713 7714 7715
		return -EINVAL;
	}

7716 7717 7718 7719 7720
	if (!env->ops->gen_ld_abs) {
		verbose(env, "bpf verifier is misconfigured\n");
		return -EINVAL;
	}

7721
	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
7722
	    BPF_SIZE(insn->code) == BPF_DW ||
7723
	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
7724
		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
7725 7726 7727 7728
		return -EINVAL;
	}

	/* check whether implicit source operand (register R6) is readable */
7729
	err = check_reg_arg(env, ctx_reg, SRC_OP);
7730 7731 7732
	if (err)
		return err;

7733 7734 7735 7736 7737 7738 7739 7740 7741 7742
	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
	 * gen_ld_abs() may terminate the program at runtime, leading to
	 * reference leak.
	 */
	err = check_reference_leak(env);
	if (err) {
		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
		return err;
	}

7743 7744 7745 7746 7747
	if (env->cur_state->active_spin_lock) {
		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
		return -EINVAL;
	}

7748
	if (regs[ctx_reg].type != PTR_TO_CTX) {
7749 7750
		verbose(env,
			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
7751 7752 7753 7754 7755
		return -EINVAL;
	}

	if (mode == BPF_IND) {
		/* check explicit source operand */
7756
		err = check_reg_arg(env, insn->src_reg, SRC_OP);
7757 7758 7759 7760
		if (err)
			return err;
	}

7761 7762 7763 7764
	err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
	if (err < 0)
		return err;

7765
	/* reset caller saved regs to unreadable */
7766
	for (i = 0; i < CALLER_SAVED_REGS; i++) {
7767
		mark_reg_not_init(env, regs, caller_saved[i]);
7768 7769
		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
	}
7770 7771

	/* mark destination R0 register as readable, since it contains
7772 7773
	 * the value fetched from the packet.
	 * Already marked as written above.
7774
	 */
7775
	mark_reg_unknown(env, regs, BPF_REG_0);
7776 7777
	/* ld_abs load up to 32-bit skb data. */
	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
7778 7779 7780
	return 0;
}

7781 7782
static int check_return_code(struct bpf_verifier_env *env)
{
7783
	struct tnum enforce_attach_type_range = tnum_unknown;
7784
	const struct bpf_prog *prog = env->prog;
7785 7786
	struct bpf_reg_state *reg;
	struct tnum range = tnum_range(0, 1);
7787
	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
7788
	int err;
7789
	const bool is_subprog = env->cur_state->frame[0]->subprogno;
7790

7791
	/* LSM and struct_ops func-ptr's return type could be "void" */
7792 7793
	if (!is_subprog &&
	    (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
7794
	     prog_type == BPF_PROG_TYPE_LSM) &&
7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811
	    !prog->aux->attach_func_proto->type)
		return 0;

	/* eBPF calling convetion is such that R0 is used
	 * to return the value from eBPF program.
	 * Make sure that it's readable at this time
	 * of bpf_exit, which means that program wrote
	 * something into it earlier
	 */
	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
	if (err)
		return err;

	if (is_pointer_value(env, BPF_REG_0)) {
		verbose(env, "R0 leaks addr as return value\n");
		return -EACCES;
	}
7812

7813 7814 7815 7816 7817 7818 7819 7820 7821 7822
	reg = cur_regs(env) + BPF_REG_0;
	if (is_subprog) {
		if (reg->type != SCALAR_VALUE) {
			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
				reg_type_str[reg->type]);
			return -EINVAL;
		}
		return 0;
	}

7823
	switch (prog_type) {
D
Daniel Borkmann 已提交
7824 7825
	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
7826 7827 7828 7829 7830
		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
D
Daniel Borkmann 已提交
7831
			range = tnum_range(1, 1);
7832
		break;
7833
	case BPF_PROG_TYPE_CGROUP_SKB:
7834 7835 7836 7837
		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
			range = tnum_range(0, 3);
			enforce_attach_type_range = tnum_range(2, 3);
		}
7838
		break;
7839 7840
	case BPF_PROG_TYPE_CGROUP_SOCK:
	case BPF_PROG_TYPE_SOCK_OPS:
7841
	case BPF_PROG_TYPE_CGROUP_DEVICE:
A
Andrey Ignatov 已提交
7842
	case BPF_PROG_TYPE_CGROUP_SYSCTL:
7843
	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
7844
		break;
7845 7846 7847 7848 7849
	case BPF_PROG_TYPE_RAW_TRACEPOINT:
		if (!env->prog->aux->attach_btf_id)
			return 0;
		range = tnum_const(0);
		break;
7850
	case BPF_PROG_TYPE_TRACING:
7851 7852 7853 7854 7855 7856 7857
		switch (env->prog->expected_attach_type) {
		case BPF_TRACE_FENTRY:
		case BPF_TRACE_FEXIT:
			range = tnum_const(0);
			break;
		case BPF_TRACE_RAW_TP:
		case BPF_MODIFY_RETURN:
7858
			return 0;
7859 7860
		case BPF_TRACE_ITER:
			break;
7861 7862 7863
		default:
			return -ENOTSUPP;
		}
7864
		break;
7865 7866 7867
	case BPF_PROG_TYPE_SK_LOOKUP:
		range = tnum_range(SK_DROP, SK_PASS);
		break;
7868 7869 7870 7871
	case BPF_PROG_TYPE_EXT:
		/* freplace program can return anything as its return value
		 * depends on the to-be-replaced kernel func or bpf program.
		 */
7872 7873 7874 7875 7876
	default:
		return 0;
	}

	if (reg->type != SCALAR_VALUE) {
7877
		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
7878 7879 7880 7881 7882
			reg_type_str[reg->type]);
		return -EINVAL;
	}

	if (!tnum_in(range, reg->var_off)) {
7883 7884
		char tn_buf[48];

7885
		verbose(env, "At program exit the register R0 ");
7886 7887
		if (!tnum_is_unknown(reg->var_off)) {
			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7888
			verbose(env, "has value %s", tn_buf);
7889
		} else {
7890
			verbose(env, "has unknown scalar value");
7891
		}
7892
		tnum_strn(tn_buf, sizeof(tn_buf), range);
D
Daniel Borkmann 已提交
7893
		verbose(env, " should have been in %s\n", tn_buf);
7894 7895
		return -EINVAL;
	}
7896 7897 7898 7899

	if (!tnum_is_unknown(enforce_attach_type_range) &&
	    tnum_in(enforce_attach_type_range, reg->var_off))
		env->prog->enforce_expected_attach_type = 1;
7900 7901 7902
	return 0;
}

7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942
/* non-recursive DFS pseudo code
 * 1  procedure DFS-iterative(G,v):
 * 2      label v as discovered
 * 3      let S be a stack
 * 4      S.push(v)
 * 5      while S is not empty
 * 6            t <- S.pop()
 * 7            if t is what we're looking for:
 * 8                return t
 * 9            for all edges e in G.adjacentEdges(t) do
 * 10               if edge e is already labelled
 * 11                   continue with the next edge
 * 12               w <- G.adjacentVertex(t,e)
 * 13               if vertex w is not discovered and not explored
 * 14                   label e as tree-edge
 * 15                   label w as discovered
 * 16                   S.push(w)
 * 17                   continue at 5
 * 18               else if vertex w is discovered
 * 19                   label e as back-edge
 * 20               else
 * 21                   // vertex w is explored
 * 22                   label e as forward- or cross-edge
 * 23           label t as explored
 * 24           S.pop()
 *
 * convention:
 * 0x10 - discovered
 * 0x11 - discovered and fall-through edge labelled
 * 0x12 - discovered and fall-through and branch edges labelled
 * 0x20 - explored
 */

enum {
	DISCOVERED = 0x10,
	EXPLORED = 0x20,
	FALLTHROUGH = 1,
	BRANCH = 2,
};

7943 7944 7945 7946 7947
static u32 state_htab_size(struct bpf_verifier_env *env)
{
	return env->prog->len;
}

7948 7949 7950 7951
static struct bpf_verifier_state_list **explored_state(
					struct bpf_verifier_env *env,
					int idx)
{
7952 7953 7954 7955
	struct bpf_verifier_state *cur = env->cur_state;
	struct bpf_func_state *state = cur->frame[cur->curframe];

	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
7956 7957 7958 7959
}

static void init_explored_state(struct bpf_verifier_env *env, int idx)
{
A
Alexei Starovoitov 已提交
7960
	env->insn_aux_data[idx].prune_point = true;
7961
}
7962

7963 7964 7965 7966 7967
/* t, w, e - match pseudo-code above:
 * t - index of current instruction
 * w - next instruction
 * e - edge
 */
7968 7969
static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
		     bool loop_ok)
7970
{
7971 7972 7973
	int *insn_stack = env->cfg.insn_stack;
	int *insn_state = env->cfg.insn_state;

7974 7975 7976 7977 7978 7979 7980
	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
		return 0;

	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
		return 0;

	if (w < 0 || w >= env->prog->len) {
7981
		verbose_linfo(env, t, "%d: ", t);
7982
		verbose(env, "jump out of range from insn %d to %d\n", t, w);
7983 7984 7985
		return -EINVAL;
	}

7986 7987
	if (e == BRANCH)
		/* mark branch target for state pruning */
7988
		init_explored_state(env, w);
7989

7990 7991 7992 7993
	if (insn_state[w] == 0) {
		/* tree-edge */
		insn_state[t] = DISCOVERED | e;
		insn_state[w] = DISCOVERED;
7994
		if (env->cfg.cur_stack >= env->prog->len)
7995
			return -E2BIG;
7996
		insn_stack[env->cfg.cur_stack++] = w;
7997 7998
		return 1;
	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
A
Alexei Starovoitov 已提交
7999
		if (loop_ok && env->bpf_capable)
8000
			return 0;
8001 8002
		verbose_linfo(env, t, "%d: ", t);
		verbose_linfo(env, w, "%d: ", w);
8003
		verbose(env, "back-edge from insn %d to %d\n", t, w);
8004 8005 8006 8007 8008
		return -EINVAL;
	} else if (insn_state[w] == EXPLORED) {
		/* forward- or cross-edge */
		insn_state[t] = DISCOVERED | e;
	} else {
8009
		verbose(env, "insn state internal bug\n");
8010 8011 8012 8013 8014 8015 8016 8017
		return -EFAULT;
	}
	return 0;
}

/* non-recursive depth-first-search to detect loops in BPF program
 * loop == back-edge in directed graph
 */
8018
static int check_cfg(struct bpf_verifier_env *env)
8019 8020 8021
{
	struct bpf_insn *insns = env->prog->insnsi;
	int insn_cnt = env->prog->len;
8022
	int *insn_stack, *insn_state;
8023 8024 8025
	int ret = 0;
	int i, t;

8026
	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8027 8028 8029
	if (!insn_state)
		return -ENOMEM;

8030
	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8031
	if (!insn_stack) {
8032
		kvfree(insn_state);
8033 8034 8035 8036 8037
		return -ENOMEM;
	}

	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
	insn_stack[0] = 0; /* 0 is the first instruction */
8038
	env->cfg.cur_stack = 1;
8039 8040

peek_stack:
8041
	if (env->cfg.cur_stack == 0)
8042
		goto check_state;
8043
	t = insn_stack[env->cfg.cur_stack - 1];
8044

J
Jiong Wang 已提交
8045 8046
	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
8047 8048 8049 8050 8051
		u8 opcode = BPF_OP(insns[t].code);

		if (opcode == BPF_EXIT) {
			goto mark_explored;
		} else if (opcode == BPF_CALL) {
8052
			ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8053 8054 8055 8056
			if (ret == 1)
				goto peek_stack;
			else if (ret < 0)
				goto err_free;
8057
			if (t + 1 < insn_cnt)
8058
				init_explored_state(env, t + 1);
8059
			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
8060
				init_explored_state(env, t);
8061 8062
				ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
						env, false);
8063 8064 8065 8066 8067
				if (ret == 1)
					goto peek_stack;
				else if (ret < 0)
					goto err_free;
			}
8068 8069 8070 8071 8072 8073 8074
		} else if (opcode == BPF_JA) {
			if (BPF_SRC(insns[t].code) != BPF_K) {
				ret = -EINVAL;
				goto err_free;
			}
			/* unconditional jump with single edge */
			ret = push_insn(t, t + insns[t].off + 1,
8075
					FALLTHROUGH, env, true);
8076 8077 8078 8079
			if (ret == 1)
				goto peek_stack;
			else if (ret < 0)
				goto err_free;
8080 8081 8082 8083 8084
			/* unconditional jmp is not a good pruning point,
			 * but it's marked, since backtracking needs
			 * to record jmp history in is_state_visited().
			 */
			init_explored_state(env, t + insns[t].off + 1);
8085 8086 8087
			/* tell verifier to check for equivalent states
			 * after every call and jump
			 */
8088
			if (t + 1 < insn_cnt)
8089
				init_explored_state(env, t + 1);
8090 8091
		} else {
			/* conditional jump with two edges */
8092
			init_explored_state(env, t);
8093
			ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
8094 8095 8096 8097 8098
			if (ret == 1)
				goto peek_stack;
			else if (ret < 0)
				goto err_free;

8099
			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
8100 8101 8102 8103 8104 8105 8106 8107 8108
			if (ret == 1)
				goto peek_stack;
			else if (ret < 0)
				goto err_free;
		}
	} else {
		/* all other non-branch instructions with single
		 * fall-through edge
		 */
8109
		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8110 8111 8112 8113 8114 8115 8116 8117
		if (ret == 1)
			goto peek_stack;
		else if (ret < 0)
			goto err_free;
	}

mark_explored:
	insn_state[t] = EXPLORED;
8118
	if (env->cfg.cur_stack-- <= 0) {
8119
		verbose(env, "pop stack internal bug\n");
8120 8121 8122 8123 8124 8125 8126 8127
		ret = -EFAULT;
		goto err_free;
	}
	goto peek_stack;

check_state:
	for (i = 0; i < insn_cnt; i++) {
		if (insn_state[i] != EXPLORED) {
8128
			verbose(env, "unreachable insn %d\n", i);
8129 8130 8131 8132 8133 8134 8135
			ret = -EINVAL;
			goto err_free;
		}
	}
	ret = 0; /* cfg looks good */

err_free:
8136 8137
	kvfree(insn_state);
	kvfree(insn_stack);
8138
	env->cfg.insn_state = env->cfg.insn_stack = NULL;
8139 8140 8141
	return ret;
}

8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158
static int check_abnormal_return(struct bpf_verifier_env *env)
{
	int i;

	for (i = 1; i < env->subprog_cnt; i++) {
		if (env->subprog_info[i].has_ld_abs) {
			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
			return -EINVAL;
		}
		if (env->subprog_info[i].has_tail_call) {
			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
			return -EINVAL;
		}
	}
	return 0;
}

Y
Yonghong Song 已提交
8159 8160 8161 8162
/* The minimum supported BTF func info size */
#define MIN_BPF_FUNCINFO_SIZE	8
#define MAX_FUNCINFO_REC_SIZE	252

M
Martin KaFai Lau 已提交
8163 8164 8165
static int check_btf_func(struct bpf_verifier_env *env,
			  const union bpf_attr *attr,
			  union bpf_attr __user *uattr)
Y
Yonghong Song 已提交
8166
{
8167
	const struct btf_type *type, *func_proto, *ret_type;
8168
	u32 i, nfuncs, urec_size, min_size;
Y
Yonghong Song 已提交
8169
	u32 krec_size = sizeof(struct bpf_func_info);
M
Martin KaFai Lau 已提交
8170
	struct bpf_func_info *krecord;
8171
	struct bpf_func_info_aux *info_aux = NULL;
M
Martin KaFai Lau 已提交
8172 8173
	struct bpf_prog *prog;
	const struct btf *btf;
Y
Yonghong Song 已提交
8174
	void __user *urecord;
8175
	u32 prev_offset = 0;
8176
	bool scalar_return;
8177
	int ret = -ENOMEM;
Y
Yonghong Song 已提交
8178 8179

	nfuncs = attr->func_info_cnt;
8180 8181 8182
	if (!nfuncs) {
		if (check_abnormal_return(env))
			return -EINVAL;
Y
Yonghong Song 已提交
8183
		return 0;
8184
	}
Y
Yonghong Song 已提交
8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198

	if (nfuncs != env->subprog_cnt) {
		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
		return -EINVAL;
	}

	urec_size = attr->func_info_rec_size;
	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
	    urec_size > MAX_FUNCINFO_REC_SIZE ||
	    urec_size % sizeof(u32)) {
		verbose(env, "invalid func info rec size %u\n", urec_size);
		return -EINVAL;
	}

M
Martin KaFai Lau 已提交
8199 8200
	prog = env->prog;
	btf = prog->aux->btf;
Y
Yonghong Song 已提交
8201 8202 8203 8204

	urecord = u64_to_user_ptr(attr->func_info);
	min_size = min_t(u32, krec_size, urec_size);

8205
	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
M
Martin KaFai Lau 已提交
8206 8207
	if (!krecord)
		return -ENOMEM;
8208 8209 8210
	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
	if (!info_aux)
		goto err_free;
8211

Y
Yonghong Song 已提交
8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222
	for (i = 0; i < nfuncs; i++) {
		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
		if (ret) {
			if (ret == -E2BIG) {
				verbose(env, "nonzero tailing record in func info");
				/* set the size kernel expects so loader can zero
				 * out the rest of the record.
				 */
				if (put_user(min_size, &uattr->func_info_rec_size))
					ret = -EFAULT;
			}
M
Martin KaFai Lau 已提交
8223
			goto err_free;
Y
Yonghong Song 已提交
8224 8225
		}

8226
		if (copy_from_user(&krecord[i], urecord, min_size)) {
Y
Yonghong Song 已提交
8227
			ret = -EFAULT;
M
Martin KaFai Lau 已提交
8228
			goto err_free;
Y
Yonghong Song 已提交
8229 8230
		}

8231
		/* check insn_off */
8232
		ret = -EINVAL;
Y
Yonghong Song 已提交
8233
		if (i == 0) {
8234
			if (krecord[i].insn_off) {
Y
Yonghong Song 已提交
8235
				verbose(env,
8236 8237
					"nonzero insn_off %u for the first func info record",
					krecord[i].insn_off);
M
Martin KaFai Lau 已提交
8238
				goto err_free;
Y
Yonghong Song 已提交
8239
			}
8240
		} else if (krecord[i].insn_off <= prev_offset) {
Y
Yonghong Song 已提交
8241 8242
			verbose(env,
				"same or smaller insn offset (%u) than previous func info record (%u)",
8243
				krecord[i].insn_off, prev_offset);
M
Martin KaFai Lau 已提交
8244
			goto err_free;
Y
Yonghong Song 已提交
8245 8246
		}

8247
		if (env->subprog_info[i].start != krecord[i].insn_off) {
Y
Yonghong Song 已提交
8248
			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
M
Martin KaFai Lau 已提交
8249
			goto err_free;
Y
Yonghong Song 已提交
8250 8251 8252
		}

		/* check type_id */
8253
		type = btf_type_by_id(btf, krecord[i].type_id);
8254
		if (!type || !btf_type_is_func(type)) {
Y
Yonghong Song 已提交
8255
			verbose(env, "invalid type id %d in func info",
8256
				krecord[i].type_id);
M
Martin KaFai Lau 已提交
8257
			goto err_free;
Y
Yonghong Song 已提交
8258
		}
8259
		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276

		func_proto = btf_type_by_id(btf, type->type);
		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
			/* btf_func_check() already verified it during BTF load */
			goto err_free;
		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
		scalar_return =
			btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
			goto err_free;
		}
		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
			goto err_free;
		}

8277
		prev_offset = krecord[i].insn_off;
Y
Yonghong Song 已提交
8278 8279 8280
		urecord += urec_size;
	}

8281 8282
	prog->aux->func_info = krecord;
	prog->aux->func_info_cnt = nfuncs;
8283
	prog->aux->func_info_aux = info_aux;
Y
Yonghong Song 已提交
8284 8285
	return 0;

M
Martin KaFai Lau 已提交
8286
err_free:
8287
	kvfree(krecord);
8288
	kfree(info_aux);
Y
Yonghong Song 已提交
8289 8290 8291
	return ret;
}

8292 8293
static void adjust_btf_func(struct bpf_verifier_env *env)
{
8294
	struct bpf_prog_aux *aux = env->prog->aux;
8295 8296
	int i;

8297
	if (!aux->func_info)
8298 8299 8300
		return;

	for (i = 0; i < env->subprog_cnt; i++)
8301
		aux->func_info[i].insn_off = env->subprog_info[i].start;
8302 8303
}

M
Martin KaFai Lau 已提交
8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382
#define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
		sizeof(((struct bpf_line_info *)(0))->line_col))
#define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE

static int check_btf_line(struct bpf_verifier_env *env,
			  const union bpf_attr *attr,
			  union bpf_attr __user *uattr)
{
	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
	struct bpf_subprog_info *sub;
	struct bpf_line_info *linfo;
	struct bpf_prog *prog;
	const struct btf *btf;
	void __user *ulinfo;
	int err;

	nr_linfo = attr->line_info_cnt;
	if (!nr_linfo)
		return 0;

	rec_size = attr->line_info_rec_size;
	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
	    rec_size > MAX_LINEINFO_REC_SIZE ||
	    rec_size & (sizeof(u32) - 1))
		return -EINVAL;

	/* Need to zero it in case the userspace may
	 * pass in a smaller bpf_line_info object.
	 */
	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
			 GFP_KERNEL | __GFP_NOWARN);
	if (!linfo)
		return -ENOMEM;

	prog = env->prog;
	btf = prog->aux->btf;

	s = 0;
	sub = env->subprog_info;
	ulinfo = u64_to_user_ptr(attr->line_info);
	expected_size = sizeof(struct bpf_line_info);
	ncopy = min_t(u32, expected_size, rec_size);
	for (i = 0; i < nr_linfo; i++) {
		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
		if (err) {
			if (err == -E2BIG) {
				verbose(env, "nonzero tailing record in line_info");
				if (put_user(expected_size,
					     &uattr->line_info_rec_size))
					err = -EFAULT;
			}
			goto err_free;
		}

		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
			err = -EFAULT;
			goto err_free;
		}

		/*
		 * Check insn_off to ensure
		 * 1) strictly increasing AND
		 * 2) bounded by prog->len
		 *
		 * The linfo[0].insn_off == 0 check logically falls into
		 * the later "missing bpf_line_info for func..." case
		 * because the first linfo[0].insn_off must be the
		 * first sub also and the first sub must have
		 * subprog_info[0].start == 0.
		 */
		if ((i && linfo[i].insn_off <= prev_offset) ||
		    linfo[i].insn_off >= prog->len) {
			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
				i, linfo[i].insn_off, prev_offset,
				prog->len);
			err = -EINVAL;
			goto err_free;
		}

8383 8384 8385 8386 8387 8388 8389 8390
		if (!prog->insnsi[linfo[i].insn_off].code) {
			verbose(env,
				"Invalid insn code at line_info[%u].insn_off\n",
				i);
			err = -EINVAL;
			goto err_free;
		}

8391 8392
		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
M
Martin KaFai Lau 已提交
8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436
			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
			err = -EINVAL;
			goto err_free;
		}

		if (s != env->subprog_cnt) {
			if (linfo[i].insn_off == sub[s].start) {
				sub[s].linfo_idx = i;
				s++;
			} else if (sub[s].start < linfo[i].insn_off) {
				verbose(env, "missing bpf_line_info for func#%u\n", s);
				err = -EINVAL;
				goto err_free;
			}
		}

		prev_offset = linfo[i].insn_off;
		ulinfo += rec_size;
	}

	if (s != env->subprog_cnt) {
		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
			env->subprog_cnt - s, s);
		err = -EINVAL;
		goto err_free;
	}

	prog->aux->linfo = linfo;
	prog->aux->nr_linfo = nr_linfo;

	return 0;

err_free:
	kvfree(linfo);
	return err;
}

static int check_btf_info(struct bpf_verifier_env *env,
			  const union bpf_attr *attr,
			  union bpf_attr __user *uattr)
{
	struct btf *btf;
	int err;

8437 8438 8439
	if (!attr->func_info_cnt && !attr->line_info_cnt) {
		if (check_abnormal_return(env))
			return -EINVAL;
M
Martin KaFai Lau 已提交
8440
		return 0;
8441
	}
M
Martin KaFai Lau 已提交
8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456

	btf = btf_get_by_fd(attr->prog_btf_fd);
	if (IS_ERR(btf))
		return PTR_ERR(btf);
	env->prog->aux->btf = btf;

	err = check_btf_func(env, attr, uattr);
	if (err)
		return err;

	err = check_btf_line(env, attr, uattr);
	if (err)
		return err;

	return 0;
8457 8458
}

8459 8460 8461 8462
/* check %cur's range satisfies %old's */
static bool range_within(struct bpf_reg_state *old,
			 struct bpf_reg_state *cur)
{
8463 8464 8465 8466
	return old->umin_value <= cur->umin_value &&
	       old->umax_value >= cur->umax_value &&
	       old->smin_value <= cur->smin_value &&
	       old->smax_value >= cur->smax_value;
8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484
}

/* Maximum number of register states that can exist at once */
#define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
struct idpair {
	u32 old;
	u32 cur;
};

/* If in the old state two registers had the same id, then they need to have
 * the same id in the new state as well.  But that id could be different from
 * the old state, so we need to track the mapping from old to new ids.
 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
 * regs with old id 5 must also have new id 9 for the new state to be safe.  But
 * regs with a different old id could still have new id 9, we don't care about
 * that.
 * So we look through our idmap to see if this old id has been seen before.  If
 * so, we require the new id to match; otherwise, we add the id pair to the map.
A
Alexei Starovoitov 已提交
8485
 */
8486
static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
A
Alexei Starovoitov 已提交
8487
{
8488
	unsigned int i;
A
Alexei Starovoitov 已提交
8489

8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504
	for (i = 0; i < ID_MAP_SIZE; i++) {
		if (!idmap[i].old) {
			/* Reached an empty slot; haven't seen this id before */
			idmap[i].old = old_id;
			idmap[i].cur = cur_id;
			return true;
		}
		if (idmap[i].old == old_id)
			return idmap[i].cur == cur_id;
	}
	/* We ran out of idmap slots, which should be impossible */
	WARN_ON_ONCE(1);
	return false;
}

8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518
static void clean_func_state(struct bpf_verifier_env *env,
			     struct bpf_func_state *st)
{
	enum bpf_reg_liveness live;
	int i, j;

	for (i = 0; i < BPF_REG_FP; i++) {
		live = st->regs[i].live;
		/* liveness must not touch this register anymore */
		st->regs[i].live |= REG_LIVE_DONE;
		if (!(live & REG_LIVE_READ))
			/* since the register is unused, clear its state
			 * to make further comparison simpler
			 */
8519
			__mark_reg_not_init(env, &st->regs[i]);
8520 8521 8522 8523 8524 8525 8526
	}

	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
		live = st->stack[i].spilled_ptr.live;
		/* liveness must not touch this stack slot anymore */
		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
		if (!(live & REG_LIVE_READ)) {
8527
			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584
			for (j = 0; j < BPF_REG_SIZE; j++)
				st->stack[i].slot_type[j] = STACK_INVALID;
		}
	}
}

static void clean_verifier_state(struct bpf_verifier_env *env,
				 struct bpf_verifier_state *st)
{
	int i;

	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
		/* all regs in this state in all frames were already marked */
		return;

	for (i = 0; i <= st->curframe; i++)
		clean_func_state(env, st->frame[i]);
}

/* the parentage chains form a tree.
 * the verifier states are added to state lists at given insn and
 * pushed into state stack for future exploration.
 * when the verifier reaches bpf_exit insn some of the verifer states
 * stored in the state lists have their final liveness state already,
 * but a lot of states will get revised from liveness point of view when
 * the verifier explores other branches.
 * Example:
 * 1: r0 = 1
 * 2: if r1 == 100 goto pc+1
 * 3: r0 = 2
 * 4: exit
 * when the verifier reaches exit insn the register r0 in the state list of
 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
 * of insn 2 and goes exploring further. At the insn 4 it will walk the
 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
 *
 * Since the verifier pushes the branch states as it sees them while exploring
 * the program the condition of walking the branch instruction for the second
 * time means that all states below this branch were already explored and
 * their final liveness markes are already propagated.
 * Hence when the verifier completes the search of state list in is_state_visited()
 * we can call this clean_live_states() function to mark all liveness states
 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
 * will not be used.
 * This function also clears the registers and stack for states that !READ
 * to simplify state merging.
 *
 * Important note here that walking the same branch instruction in the callee
 * doesn't meant that the states are DONE. The verifier has to compare
 * the callsites
 */
static void clean_live_states(struct bpf_verifier_env *env, int insn,
			      struct bpf_verifier_state *cur)
{
	struct bpf_verifier_state_list *sl;
	int i;

8585
	sl = *explored_state(env, insn);
A
Alexei Starovoitov 已提交
8586
	while (sl) {
8587 8588
		if (sl->state.branches)
			goto next;
8589 8590
		if (sl->state.insn_idx != insn ||
		    sl->state.curframe != cur->curframe)
8591 8592 8593 8594 8595 8596 8597 8598 8599 8600
			goto next;
		for (i = 0; i <= cur->curframe; i++)
			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
				goto next;
		clean_verifier_state(env, &sl->state);
next:
		sl = sl->next;
	}
}

8601
/* Returns true if (rold safe implies rcur safe) */
8602 8603
static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
		    struct idpair *idmap)
8604
{
8605 8606
	bool equal;

8607 8608 8609 8610
	if (!(rold->live & REG_LIVE_READ))
		/* explored state didn't use this */
		return true;

8611
	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
8612 8613 8614 8615 8616 8617 8618 8619

	if (rold->type == PTR_TO_STACK)
		/* two stack pointers are equal only if they're pointing to
		 * the same stack frame, since fp-8 in foo != fp-8 in bar
		 */
		return equal && rold->frameno == rcur->frameno;

	if (equal)
A
Alexei Starovoitov 已提交
8620 8621
		return true;

8622 8623
	if (rold->type == NOT_INIT)
		/* explored state can't have used this */
A
Alexei Starovoitov 已提交
8624
		return true;
8625 8626 8627 8628 8629
	if (rcur->type == NOT_INIT)
		return false;
	switch (rold->type) {
	case SCALAR_VALUE:
		if (rcur->type == SCALAR_VALUE) {
8630 8631
			if (!rold->precise && !rcur->precise)
				return true;
8632 8633 8634 8635
			/* new val must satisfy old val knowledge */
			return range_within(rold, rcur) &&
			       tnum_in(rold->var_off, rcur->var_off);
		} else {
8636 8637 8638 8639 8640 8641
			/* We're trying to use a pointer in place of a scalar.
			 * Even if the scalar was unbounded, this could lead to
			 * pointer leaks because scalars are allowed to leak
			 * while pointers are not. We could make this safe in
			 * special cases if root is calling us, but it's
			 * probably not worth the hassle.
8642
			 */
8643
			return false;
8644 8645
		}
	case PTR_TO_MAP_VALUE:
8646 8647
		/* If the new min/max/var_off satisfy the old ones and
		 * everything else matches, we are OK.
8648 8649 8650 8651 8652
		 * 'id' is not compared, since it's only used for maps with
		 * bpf_spin_lock inside map element and in such cases if
		 * the rest of the prog is valid for one map element then
		 * it's valid for all map elements regardless of the key
		 * used in bpf_map_lookup()
8653 8654 8655 8656
		 */
		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
		       range_within(rold, rcur) &&
		       tnum_in(rold->var_off, rcur->var_off);
8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670
	case PTR_TO_MAP_VALUE_OR_NULL:
		/* a PTR_TO_MAP_VALUE could be safe to use as a
		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
		 * checked, doing so could have affected others with the same
		 * id, and we can't check for that because we lost the id when
		 * we converted to a PTR_TO_MAP_VALUE.
		 */
		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
			return false;
		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
			return false;
		/* Check our ids match any regs they're supposed to */
		return check_ids(rold->id, rcur->id, idmap);
8671
	case PTR_TO_PACKET_META:
8672
	case PTR_TO_PACKET:
8673
		if (rcur->type != rold->type)
8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696
			return false;
		/* We must have at least as much range as the old ptr
		 * did, so that any accesses which were safe before are
		 * still safe.  This is true even if old range < old off,
		 * since someone could have accessed through (ptr - k), or
		 * even done ptr -= k in a register, to get a safe access.
		 */
		if (rold->range > rcur->range)
			return false;
		/* If the offsets don't match, we can't trust our alignment;
		 * nor can we be sure that we won't fall out of range.
		 */
		if (rold->off != rcur->off)
			return false;
		/* id relations must be preserved */
		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
			return false;
		/* new val must satisfy old val knowledge */
		return range_within(rold, rcur) &&
		       tnum_in(rold->var_off, rcur->var_off);
	case PTR_TO_CTX:
	case CONST_PTR_TO_MAP:
	case PTR_TO_PACKET_END:
8697
	case PTR_TO_FLOW_KEYS:
8698 8699
	case PTR_TO_SOCKET:
	case PTR_TO_SOCKET_OR_NULL:
8700 8701
	case PTR_TO_SOCK_COMMON:
	case PTR_TO_SOCK_COMMON_OR_NULL:
8702 8703
	case PTR_TO_TCP_SOCK:
	case PTR_TO_TCP_SOCK_OR_NULL:
8704
	case PTR_TO_XDP_SOCK:
8705 8706 8707 8708 8709 8710 8711
		/* Only valid matches are exact, which memcmp() above
		 * would have accepted
		 */
	default:
		/* Don't know what's going on, just say it's not safe */
		return false;
	}
A
Alexei Starovoitov 已提交
8712

8713 8714
	/* Shouldn't get here; if we do, say it's not safe */
	WARN_ON_ONCE(1);
A
Alexei Starovoitov 已提交
8715 8716 8717
	return false;
}

8718 8719
static bool stacksafe(struct bpf_func_state *old,
		      struct bpf_func_state *cur,
8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730
		      struct idpair *idmap)
{
	int i, spi;

	/* walk slots of the explored stack and ignore any additional
	 * slots in the current stack, since explored(safe) state
	 * didn't use them
	 */
	for (i = 0; i < old->allocated_stack; i++) {
		spi = i / BPF_REG_SIZE;

A
Alexei Starovoitov 已提交
8731 8732
		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
			i += BPF_REG_SIZE - 1;
8733
			/* explored state didn't use this */
8734
			continue;
A
Alexei Starovoitov 已提交
8735
		}
8736

8737 8738
		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
			continue;
8739 8740 8741 8742 8743 8744 8745

		/* explored stack has more populated slots than current stack
		 * and these slots were used
		 */
		if (i >= cur->allocated_stack)
			return false;

8746 8747 8748 8749 8750 8751 8752
		/* if old state was safe with misc data in the stack
		 * it will be safe with zero-initialized stack.
		 * The opposite is not true
		 */
		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
			continue;
8753 8754 8755
		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
			/* Ex: old explored (safe) state has STACK_SPILL in
8756
			 * this stack slot, but current has STACK_MISC ->
8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782
			 * this verifier states are not equivalent,
			 * return false to continue verification of this path
			 */
			return false;
		if (i % BPF_REG_SIZE)
			continue;
		if (old->stack[spi].slot_type[0] != STACK_SPILL)
			continue;
		if (!regsafe(&old->stack[spi].spilled_ptr,
			     &cur->stack[spi].spilled_ptr,
			     idmap))
			/* when explored and current stack slot are both storing
			 * spilled registers, check that stored pointers types
			 * are the same as well.
			 * Ex: explored safe path could have stored
			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
			 * but current path has stored:
			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
			 * such verifier states are not equivalent.
			 * return false to continue verification of this path
			 */
			return false;
	}
	return true;
}

8783 8784 8785 8786 8787 8788 8789 8790
static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
{
	if (old->acquired_refs != cur->acquired_refs)
		return false;
	return !memcmp(old->refs, cur->refs,
		       sizeof(*old->refs) * old->acquired_refs);
}

8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816
/* compare two verifier states
 *
 * all states stored in state_list are known to be valid, since
 * verifier reached 'bpf_exit' instruction through them
 *
 * this function is called when verifier exploring different branches of
 * execution popped from the state stack. If it sees an old state that has
 * more strict register state and more strict stack state then this execution
 * branch doesn't need to be explored further, since verifier already
 * concluded that more strict state leads to valid finish.
 *
 * Therefore two states are equivalent if register state is more conservative
 * and explored stack state is more conservative than the current one.
 * Example:
 *       explored                   current
 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
 *
 * In other words if current stack state (one being explored) has more
 * valid slots than old one that already passed validation, it means
 * the verifier can stop exploring and conclude that current state is valid too
 *
 * Similarly with registers. If explored state has register type as invalid
 * whereas register type in current state is meaningful, it means that
 * the current state will reach 'bpf_exit' instruction safely
 */
8817 8818
static bool func_states_equal(struct bpf_func_state *old,
			      struct bpf_func_state *cur)
8819
{
8820 8821
	struct idpair *idmap;
	bool ret = false;
8822 8823
	int i;

8824 8825 8826
	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
	/* If we failed to allocate the idmap, just say it's not safe */
	if (!idmap)
A
Alexei Starovoitov 已提交
8827
		return false;
8828 8829

	for (i = 0; i < MAX_BPF_REG; i++) {
8830
		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
8831
			goto out_free;
8832 8833
	}

8834 8835
	if (!stacksafe(old, cur, idmap))
		goto out_free;
8836 8837 8838

	if (!refsafe(old, cur))
		goto out_free;
8839 8840 8841 8842
	ret = true;
out_free:
	kfree(idmap);
	return ret;
8843 8844
}

8845 8846 8847 8848 8849 8850 8851 8852 8853
static bool states_equal(struct bpf_verifier_env *env,
			 struct bpf_verifier_state *old,
			 struct bpf_verifier_state *cur)
{
	int i;

	if (old->curframe != cur->curframe)
		return false;

8854 8855 8856 8857 8858 8859
	/* Verification state from speculative execution simulation
	 * must never prune a non-speculative execution one.
	 */
	if (old->speculative && !cur->speculative)
		return false;

8860 8861 8862
	if (old->active_spin_lock != cur->active_spin_lock)
		return false;

8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874
	/* for states to be equal callsites have to be the same
	 * and all frame states need to be equivalent
	 */
	for (i = 0; i <= old->curframe; i++) {
		if (old->frame[i]->callsite != cur->frame[i]->callsite)
			return false;
		if (!func_states_equal(old->frame[i], cur->frame[i]))
			return false;
	}
	return true;
}

8875 8876 8877
/* Return 0 if no propagation happened. Return negative error code if error
 * happened. Otherwise, return the propagated bit.
 */
8878 8879 8880 8881
static int propagate_liveness_reg(struct bpf_verifier_env *env,
				  struct bpf_reg_state *reg,
				  struct bpf_reg_state *parent_reg)
{
8882 8883
	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
	u8 flag = reg->live & REG_LIVE_READ;
8884 8885
	int err;

8886 8887 8888 8889 8890 8891 8892 8893 8894
	/* When comes here, read flags of PARENT_REG or REG could be any of
	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
	 */
	if (parent_flag == REG_LIVE_READ64 ||
	    /* Or if there is no read flag from REG. */
	    !flag ||
	    /* Or if the read flag from REG is the same as PARENT_REG. */
	    parent_flag == flag)
8895 8896
		return 0;

8897
	err = mark_reg_read(env, reg, parent_reg, flag);
8898 8899 8900
	if (err)
		return err;

8901
	return flag;
8902 8903
}

8904
/* A write screens off any subsequent reads; but write marks come from the
8905 8906 8907 8908
 * straight-line code between a state and its parent.  When we arrive at an
 * equivalent state (jump target or such) we didn't arrive by the straight-line
 * code, so read marks in the state must propagate to the parent regardless
 * of the state's write marks. That's what 'parent == state->parent' comparison
8909
 * in mark_reg_read() is for.
8910
 */
8911 8912 8913
static int propagate_liveness(struct bpf_verifier_env *env,
			      const struct bpf_verifier_state *vstate,
			      struct bpf_verifier_state *vparent)
8914
{
8915
	struct bpf_reg_state *state_reg, *parent_reg;
8916
	struct bpf_func_state *state, *parent;
8917
	int i, frame, err = 0;
8918

8919 8920 8921 8922 8923
	if (vparent->curframe != vstate->curframe) {
		WARN(1, "propagate_live: parent frame %d current frame %d\n",
		     vparent->curframe, vstate->curframe);
		return -EFAULT;
	}
8924 8925
	/* Propagate read liveness of registers... */
	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
8926
	for (frame = 0; frame <= vstate->curframe; frame++) {
8927 8928 8929 8930
		parent = vparent->frame[frame];
		state = vstate->frame[frame];
		parent_reg = parent->regs;
		state_reg = state->regs;
8931 8932
		/* We don't need to worry about FP liveness, it's read-only */
		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
8933 8934
			err = propagate_liveness_reg(env, &state_reg[i],
						     &parent_reg[i]);
8935
			if (err < 0)
8936
				return err;
8937 8938
			if (err == REG_LIVE_READ64)
				mark_insn_zext(env, &parent_reg[i]);
8939
		}
8940

8941
		/* Propagate stack slots. */
8942 8943
		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
8944 8945
			parent_reg = &parent->stack[i].spilled_ptr;
			state_reg = &state->stack[i].spilled_ptr;
8946 8947
			err = propagate_liveness_reg(env, state_reg,
						     parent_reg);
8948
			if (err < 0)
8949
				return err;
8950 8951
		}
	}
8952
	return 0;
8953 8954
}

A
Alexei Starovoitov 已提交
8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994
/* find precise scalars in the previous equivalent state and
 * propagate them into the current state
 */
static int propagate_precision(struct bpf_verifier_env *env,
			       const struct bpf_verifier_state *old)
{
	struct bpf_reg_state *state_reg;
	struct bpf_func_state *state;
	int i, err = 0;

	state = old->frame[old->curframe];
	state_reg = state->regs;
	for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
		if (state_reg->type != SCALAR_VALUE ||
		    !state_reg->precise)
			continue;
		if (env->log.level & BPF_LOG_LEVEL2)
			verbose(env, "propagating r%d\n", i);
		err = mark_chain_precision(env, i);
		if (err < 0)
			return err;
	}

	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
		if (state->stack[i].slot_type[0] != STACK_SPILL)
			continue;
		state_reg = &state->stack[i].spilled_ptr;
		if (state_reg->type != SCALAR_VALUE ||
		    !state_reg->precise)
			continue;
		if (env->log.level & BPF_LOG_LEVEL2)
			verbose(env, "propagating fp%d\n",
				(-i - 1) * BPF_REG_SIZE);
		err = mark_chain_precision_stack(env, i);
		if (err < 0)
			return err;
	}
	return 0;
}

8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013
static bool states_maybe_looping(struct bpf_verifier_state *old,
				 struct bpf_verifier_state *cur)
{
	struct bpf_func_state *fold, *fcur;
	int i, fr = cur->curframe;

	if (old->curframe != fr)
		return false;

	fold = old->frame[fr];
	fcur = cur->frame[fr];
	for (i = 0; i < MAX_BPF_REG; i++)
		if (memcmp(&fold->regs[i], &fcur->regs[i],
			   offsetof(struct bpf_reg_state, parent)))
			return false;
	return true;
}


9014
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
9015
{
9016
	struct bpf_verifier_state_list *new_sl;
9017
	struct bpf_verifier_state_list *sl, **pprev;
9018
	struct bpf_verifier_state *cur = env->cur_state, *new;
9019
	int i, j, err, states_cnt = 0;
9020
	bool add_new_state = env->test_state_freq ? true : false;
9021

9022
	cur->last_insn_idx = env->prev_insn_idx;
A
Alexei Starovoitov 已提交
9023
	if (!env->insn_aux_data[insn_idx].prune_point)
9024 9025 9026 9027 9028
		/* this 'insn_idx' instruction wasn't marked, so we will not
		 * be doing state search here
		 */
		return 0;

9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040
	/* bpf progs typically have pruning point every 4 instructions
	 * http://vger.kernel.org/bpfconf2019.html#session-1
	 * Do not add new state for future pruning if the verifier hasn't seen
	 * at least 2 jumps and at least 8 instructions.
	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
	 * In tests that amounts to up to 50% reduction into total verifier
	 * memory consumption and 20% verifier time speedup.
	 */
	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
	    env->insn_processed - env->prev_insn_processed >= 8)
		add_new_state = true;

A
Alexei Starovoitov 已提交
9041 9042 9043
	pprev = explored_state(env, insn_idx);
	sl = *pprev;

9044 9045
	clean_live_states(env, insn_idx, cur);

A
Alexei Starovoitov 已提交
9046
	while (sl) {
9047 9048 9049
		states_cnt++;
		if (sl->state.insn_idx != insn_idx)
			goto next;
9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073
		if (sl->state.branches) {
			if (states_maybe_looping(&sl->state, cur) &&
			    states_equal(env, &sl->state, cur)) {
				verbose_linfo(env, insn_idx, "; ");
				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
				return -EINVAL;
			}
			/* if the verifier is processing a loop, avoid adding new state
			 * too often, since different loop iterations have distinct
			 * states and may not help future pruning.
			 * This threshold shouldn't be too low to make sure that
			 * a loop with large bound will be rejected quickly.
			 * The most abusive loop will be:
			 * r1 += 1
			 * if r1 < 1000000 goto pc-2
			 * 1M insn_procssed limit / 100 == 10k peak states.
			 * This threshold shouldn't be too high either, since states
			 * at the end of the loop are likely to be useful in pruning.
			 */
			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
			    env->insn_processed - env->prev_insn_processed < 100)
				add_new_state = false;
			goto miss;
		}
9074
		if (states_equal(env, &sl->state, cur)) {
9075
			sl->hit_cnt++;
9076
			/* reached equivalent register/stack state,
9077 9078
			 * prune the search.
			 * Registers read by the continuation are read by us.
9079 9080 9081 9082 9083 9084
			 * If we have any write marks in env->cur_state, they
			 * will prevent corresponding reads in the continuation
			 * from reaching our parent (an explored_state).  Our
			 * own state will get the read marks recorded, but
			 * they'll be immediately forgotten as we're pruning
			 * this state and will pop a new one.
9085
			 */
9086
			err = propagate_liveness(env, &sl->state, cur);
A
Alexei Starovoitov 已提交
9087 9088 9089 9090 9091 9092 9093 9094

			/* if previous state reached the exit with precision and
			 * current state is equivalent to it (except precsion marks)
			 * the precision needs to be propagated back in
			 * the current state.
			 */
			err = err ? : push_jmp_history(env, cur);
			err = err ? : propagate_precision(env, &sl->state);
9095 9096
			if (err)
				return err;
9097
			return 1;
9098
		}
9099 9100 9101 9102 9103 9104 9105 9106 9107
miss:
		/* when new state is not going to be added do not increase miss count.
		 * Otherwise several loop iterations will remove the state
		 * recorded earlier. The goal of these heuristics is to have
		 * states from some iterations of the loop (some in the beginning
		 * and some at the end) to help pruning.
		 */
		if (add_new_state)
			sl->miss_cnt++;
9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118
		/* heuristic to determine whether this state is beneficial
		 * to keep checking from state equivalence point of view.
		 * Higher numbers increase max_states_per_insn and verification time,
		 * but do not meaningfully decrease insn_processed.
		 */
		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
			/* the state is unlikely to be useful. Remove it to
			 * speed up verification
			 */
			*pprev = sl->next;
			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
9119 9120 9121 9122 9123
				u32 br = sl->state.branches;

				WARN_ONCE(br,
					  "BUG live_done but branches_to_explore %d\n",
					  br);
9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137
				free_verifier_state(&sl->state, false);
				kfree(sl);
				env->peak_states--;
			} else {
				/* cannot free this state, since parentage chain may
				 * walk it later. Add it for free_list instead to
				 * be freed at the end of verification
				 */
				sl->next = env->free_list;
				env->free_list = sl;
			}
			sl = *pprev;
			continue;
		}
9138
next:
9139 9140
		pprev = &sl->next;
		sl = *pprev;
9141 9142
	}

9143 9144 9145
	if (env->max_states_per_insn < states_cnt)
		env->max_states_per_insn = states_cnt;

A
Alexei Starovoitov 已提交
9146
	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
9147
		return push_jmp_history(env, cur);
9148

9149
	if (!add_new_state)
9150
		return push_jmp_history(env, cur);
9151

9152 9153
	/* There were no equivalent states, remember the current one.
	 * Technically the current state is not proven to be safe yet,
9154
	 * but it will either reach outer most bpf_exit (which means it's safe)
9155
	 * or it will be rejected. When there are no loops the verifier won't be
9156
	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
9157 9158 9159
	 * again on the way to bpf_exit.
	 * When looping the sl->state.branches will be > 0 and this state
	 * will not be considered for equivalence until branches == 0.
9160
	 */
9161
	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
9162 9163
	if (!new_sl)
		return -ENOMEM;
9164 9165
	env->total_states++;
	env->peak_states++;
9166 9167
	env->prev_jmps_processed = env->jmps_processed;
	env->prev_insn_processed = env->insn_processed;
9168 9169

	/* add new state to the head of linked list */
9170 9171
	new = &new_sl->state;
	err = copy_verifier_state(new, cur);
9172
	if (err) {
9173
		free_verifier_state(new, false);
9174 9175 9176
		kfree(new_sl);
		return err;
	}
9177
	new->insn_idx = insn_idx;
9178 9179
	WARN_ONCE(new->branches != 1,
		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
9180

9181
	cur->parent = new;
9182 9183
	cur->first_insn_idx = insn_idx;
	clear_jmp_history(cur);
9184 9185
	new_sl->next = *explored_state(env, insn_idx);
	*explored_state(env, insn_idx) = new_sl;
9186 9187 9188 9189 9190 9191 9192
	/* connect new state to parentage chain. Current frame needs all
	 * registers connected. Only r6 - r9 of the callers are alive (pushed
	 * to the stack implicitly by JITs) so in callers' frames connect just
	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
	 * the state of the call instruction (with WRITTEN set), and r0 comes
	 * from callee with its full parentage chain, anyway.
	 */
9193 9194 9195 9196 9197 9198
	/* clear write marks in current state: the writes we did are not writes
	 * our child did, so they don't screen off its reads from us.
	 * (There are no read marks in current state, because reads always mark
	 * their parent and current state never has children yet.  Only
	 * explored_states can get read marks.)
	 */
9199 9200 9201 9202 9203 9204
	for (j = 0; j <= cur->curframe; j++) {
		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
		for (i = 0; i < BPF_REG_FP; i++)
			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
	}
9205 9206 9207 9208

	/* all stack frames are accessible from callee, clear them all */
	for (j = 0; j <= cur->curframe; j++) {
		struct bpf_func_state *frame = cur->frame[j];
9209
		struct bpf_func_state *newframe = new->frame[j];
9210

9211
		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
9212
			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
9213 9214 9215
			frame->stack[i].spilled_ptr.parent =
						&newframe->stack[i].spilled_ptr;
		}
9216
	}
9217 9218 9219
	return 0;
}

9220 9221 9222 9223 9224 9225 9226
/* Return true if it's OK to have the same insn return a different type. */
static bool reg_type_mismatch_ok(enum bpf_reg_type type)
{
	switch (type) {
	case PTR_TO_CTX:
	case PTR_TO_SOCKET:
	case PTR_TO_SOCKET_OR_NULL:
9227 9228
	case PTR_TO_SOCK_COMMON:
	case PTR_TO_SOCK_COMMON_OR_NULL:
9229 9230
	case PTR_TO_TCP_SOCK:
	case PTR_TO_TCP_SOCK_OR_NULL:
9231
	case PTR_TO_XDP_SOCK:
9232
	case PTR_TO_BTF_ID:
9233
	case PTR_TO_BTF_ID_OR_NULL:
9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257
		return false;
	default:
		return true;
	}
}

/* If an instruction was previously used with particular pointer types, then we
 * need to be careful to avoid cases such as the below, where it may be ok
 * for one branch accessing the pointer, but not ok for the other branch:
 *
 * R1 = sock_ptr
 * goto X;
 * ...
 * R1 = some_other_valid_ptr;
 * goto X;
 * ...
 * R2 = *(u32 *)(R1 + 0);
 */
static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
{
	return src != prev && (!reg_type_mismatch_ok(src) ||
			       !reg_type_mismatch_ok(prev));
}

9258
static int do_check(struct bpf_verifier_env *env)
9259
{
9260
	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
9261
	struct bpf_verifier_state *state = env->cur_state;
9262
	struct bpf_insn *insns = env->prog->insnsi;
9263
	struct bpf_reg_state *regs;
9264
	int insn_cnt = env->prog->len;
9265
	bool do_print_state = false;
9266
	int prev_insn_idx = -1;
9267 9268 9269 9270 9271 9272

	for (;;) {
		struct bpf_insn *insn;
		u8 class;
		int err;

9273
		env->prev_insn_idx = prev_insn_idx;
9274
		if (env->insn_idx >= insn_cnt) {
9275
			verbose(env, "invalid insn idx %d insn_cnt %d\n",
9276
				env->insn_idx, insn_cnt);
9277 9278 9279
			return -EFAULT;
		}

9280
		insn = &insns[env->insn_idx];
9281 9282
		class = BPF_CLASS(insn->code);

9283
		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
9284 9285
			verbose(env,
				"BPF program is too large. Processed %d insn\n",
9286
				env->insn_processed);
9287 9288 9289
			return -E2BIG;
		}

9290
		err = is_state_visited(env, env->insn_idx);
9291 9292 9293 9294
		if (err < 0)
			return err;
		if (err == 1) {
			/* found equivalent state, can prune the search */
9295
			if (env->log.level & BPF_LOG_LEVEL) {
9296
				if (do_print_state)
9297 9298 9299 9300
					verbose(env, "\nfrom %d to %d%s: safe\n",
						env->prev_insn_idx, env->insn_idx,
						env->cur_state->speculative ?
						" (speculative execution)" : "");
9301
				else
9302
					verbose(env, "%d: safe\n", env->insn_idx);
9303 9304 9305 9306
			}
			goto process_bpf_exit;
		}

9307 9308 9309
		if (signal_pending(current))
			return -EAGAIN;

9310 9311 9312
		if (need_resched())
			cond_resched();

9313 9314 9315
		if (env->log.level & BPF_LOG_LEVEL2 ||
		    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
			if (env->log.level & BPF_LOG_LEVEL2)
9316
				verbose(env, "%d:", env->insn_idx);
9317
			else
9318 9319 9320 9321
				verbose(env, "\nfrom %d to %d%s:",
					env->prev_insn_idx, env->insn_idx,
					env->cur_state->speculative ?
					" (speculative execution)" : "");
9322
			print_verifier_state(env, state->frame[state->curframe]);
9323 9324 9325
			do_print_state = false;
		}

9326
		if (env->log.level & BPF_LOG_LEVEL) {
9327 9328
			const struct bpf_insn_cbs cbs = {
				.cb_print	= verbose,
9329
				.private_data	= env,
9330 9331
			};

9332 9333
			verbose_linfo(env, env->insn_idx, "; ");
			verbose(env, "%d: ", env->insn_idx);
9334
			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
9335 9336
		}

9337
		if (bpf_prog_is_dev_bound(env->prog->aux)) {
9338 9339
			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
							   env->prev_insn_idx);
9340 9341 9342
			if (err)
				return err;
		}
9343

9344
		regs = cur_regs(env);
9345
		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
9346
		prev_insn_idx = env->insn_idx;
9347

9348
		if (class == BPF_ALU || class == BPF_ALU64) {
9349
			err = check_alu_op(env, insn);
9350 9351 9352 9353
			if (err)
				return err;

		} else if (class == BPF_LDX) {
9354
			enum bpf_reg_type *prev_src_type, src_reg_type;
9355 9356 9357

			/* check for reserved fields is already done */

9358
			/* check src operand */
9359
			err = check_reg_arg(env, insn->src_reg, SRC_OP);
9360 9361 9362
			if (err)
				return err;

9363
			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9364 9365 9366
			if (err)
				return err;

9367 9368
			src_reg_type = regs[insn->src_reg].type;

9369 9370 9371
			/* check that memory (src_reg + off) is readable,
			 * the state of dst_reg will be updated by this func
			 */
9372 9373 9374
			err = check_mem_access(env, env->insn_idx, insn->src_reg,
					       insn->off, BPF_SIZE(insn->code),
					       BPF_READ, insn->dst_reg, false);
9375 9376 9377
			if (err)
				return err;

9378
			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9379 9380

			if (*prev_src_type == NOT_INIT) {
9381 9382
				/* saw a valid insn
				 * dst_reg = *(u32 *)(src_reg + off)
9383
				 * save type to validate intersecting paths
9384
				 */
9385
				*prev_src_type = src_reg_type;
9386

9387
			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9388 9389 9390 9391 9392 9393 9394
				/* ABuser program is trying to use the same insn
				 * dst_reg = *(u32*) (src_reg + off)
				 * with different pointer types:
				 * src_reg == ctx in one branch and
				 * src_reg == stack|map in some other branch.
				 * Reject it.
				 */
9395
				verbose(env, "same insn cannot be used with different pointers\n");
9396 9397 9398
				return -EINVAL;
			}

9399
		} else if (class == BPF_STX) {
9400
			enum bpf_reg_type *prev_dst_type, dst_reg_type;
9401

9402
			if (BPF_MODE(insn->code) == BPF_XADD) {
9403
				err = check_xadd(env, env->insn_idx, insn);
9404 9405
				if (err)
					return err;
9406
				env->insn_idx++;
9407 9408 9409 9410
				continue;
			}

			/* check src1 operand */
9411
			err = check_reg_arg(env, insn->src_reg, SRC_OP);
9412 9413 9414
			if (err)
				return err;
			/* check src2 operand */
9415
			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9416 9417 9418
			if (err)
				return err;

9419 9420
			dst_reg_type = regs[insn->dst_reg].type;

9421
			/* check that memory (dst_reg + off) is writeable */
9422 9423 9424
			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
					       insn->off, BPF_SIZE(insn->code),
					       BPF_WRITE, insn->src_reg, false);
9425 9426 9427
			if (err)
				return err;

9428
			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9429 9430 9431

			if (*prev_dst_type == NOT_INIT) {
				*prev_dst_type = dst_reg_type;
9432
			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
9433
				verbose(env, "same insn cannot be used with different pointers\n");
9434 9435 9436
				return -EINVAL;
			}

9437 9438 9439
		} else if (class == BPF_ST) {
			if (BPF_MODE(insn->code) != BPF_MEM ||
			    insn->src_reg != BPF_REG_0) {
9440
				verbose(env, "BPF_ST uses reserved fields\n");
9441 9442 9443
				return -EINVAL;
			}
			/* check src operand */
9444
			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9445 9446 9447
			if (err)
				return err;

9448
			if (is_ctx_reg(env, insn->dst_reg)) {
9449
				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
9450 9451
					insn->dst_reg,
					reg_type_str[reg_state(env, insn->dst_reg)->type]);
9452 9453 9454
				return -EACCES;
			}

9455
			/* check that memory (dst_reg + off) is writeable */
9456 9457 9458
			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
					       insn->off, BPF_SIZE(insn->code),
					       BPF_WRITE, -1, false);
9459 9460 9461
			if (err)
				return err;

J
Jiong Wang 已提交
9462
		} else if (class == BPF_JMP || class == BPF_JMP32) {
9463 9464
			u8 opcode = BPF_OP(insn->code);

9465
			env->jmps_processed++;
9466 9467 9468
			if (opcode == BPF_CALL) {
				if (BPF_SRC(insn->code) != BPF_K ||
				    insn->off != 0 ||
9469 9470
				    (insn->src_reg != BPF_REG_0 &&
				     insn->src_reg != BPF_PSEUDO_CALL) ||
J
Jiong Wang 已提交
9471 9472
				    insn->dst_reg != BPF_REG_0 ||
				    class == BPF_JMP32) {
9473
					verbose(env, "BPF_CALL uses reserved fields\n");
9474 9475 9476
					return -EINVAL;
				}

9477 9478 9479 9480 9481 9482
				if (env->cur_state->active_spin_lock &&
				    (insn->src_reg == BPF_PSEUDO_CALL ||
				     insn->imm != BPF_FUNC_spin_unlock)) {
					verbose(env, "function calls are not allowed while holding a lock\n");
					return -EINVAL;
				}
9483
				if (insn->src_reg == BPF_PSEUDO_CALL)
9484
					err = check_func_call(env, insn, &env->insn_idx);
9485
				else
9486
					err = check_helper_call(env, insn->imm, env->insn_idx);
9487 9488 9489 9490 9491 9492 9493
				if (err)
					return err;

			} else if (opcode == BPF_JA) {
				if (BPF_SRC(insn->code) != BPF_K ||
				    insn->imm != 0 ||
				    insn->src_reg != BPF_REG_0 ||
J
Jiong Wang 已提交
9494 9495
				    insn->dst_reg != BPF_REG_0 ||
				    class == BPF_JMP32) {
9496
					verbose(env, "BPF_JA uses reserved fields\n");
9497 9498 9499
					return -EINVAL;
				}

9500
				env->insn_idx += insn->off + 1;
9501 9502 9503 9504 9505 9506
				continue;

			} else if (opcode == BPF_EXIT) {
				if (BPF_SRC(insn->code) != BPF_K ||
				    insn->imm != 0 ||
				    insn->src_reg != BPF_REG_0 ||
J
Jiong Wang 已提交
9507 9508
				    insn->dst_reg != BPF_REG_0 ||
				    class == BPF_JMP32) {
9509
					verbose(env, "BPF_EXIT uses reserved fields\n");
9510 9511 9512
					return -EINVAL;
				}

9513 9514 9515 9516 9517
				if (env->cur_state->active_spin_lock) {
					verbose(env, "bpf_spin_unlock is missing\n");
					return -EINVAL;
				}

9518 9519
				if (state->curframe) {
					/* exit from nested function */
9520
					err = prepare_func_exit(env, &env->insn_idx);
9521 9522 9523 9524 9525 9526
					if (err)
						return err;
					do_print_state = true;
					continue;
				}

9527 9528 9529 9530
				err = check_reference_leak(env);
				if (err)
					return err;

9531 9532 9533
				err = check_return_code(env);
				if (err)
					return err;
9534
process_bpf_exit:
9535
				update_branch_counts(env, env->cur_state);
9536
				err = pop_stack(env, &prev_insn_idx,
9537
						&env->insn_idx, pop_log);
9538 9539 9540
				if (err < 0) {
					if (err != -ENOENT)
						return err;
9541 9542 9543 9544 9545 9546
					break;
				} else {
					do_print_state = true;
					continue;
				}
			} else {
9547
				err = check_cond_jmp_op(env, insn, &env->insn_idx);
9548 9549 9550 9551 9552 9553 9554
				if (err)
					return err;
			}
		} else if (class == BPF_LD) {
			u8 mode = BPF_MODE(insn->code);

			if (mode == BPF_ABS || mode == BPF_IND) {
9555 9556 9557 9558
				err = check_ld_abs(env, insn);
				if (err)
					return err;

9559 9560 9561 9562 9563
			} else if (mode == BPF_IMM) {
				err = check_ld_imm(env, insn);
				if (err)
					return err;

9564
				env->insn_idx++;
9565
				env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
9566
			} else {
9567
				verbose(env, "invalid BPF_LD mode\n");
9568 9569 9570
				return -EINVAL;
			}
		} else {
9571
			verbose(env, "unknown insn class %d\n", class);
9572 9573 9574
			return -EINVAL;
		}

9575
		env->insn_idx++;
9576 9577 9578 9579 9580
	}

	return 0;
}

H
Hao Luo 已提交
9581 9582 9583 9584 9585
/* replace pseudo btf_id with kernel symbol address */
static int check_pseudo_btf_id(struct bpf_verifier_env *env,
			       struct bpf_insn *insn,
			       struct bpf_insn_aux_data *aux)
{
H
Hao Luo 已提交
9586 9587
	const struct btf_var_secinfo *vsi;
	const struct btf_type *datasec;
H
Hao Luo 已提交
9588 9589
	const struct btf_type *t;
	const char *sym_name;
H
Hao Luo 已提交
9590
	bool percpu = false;
9591 9592
	u32 type, id = insn->imm;
	s32 datasec_id;
H
Hao Luo 已提交
9593
	u64 addr;
H
Hao Luo 已提交
9594
	int i;
H
Hao Luo 已提交
9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625

	if (!btf_vmlinux) {
		verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
		return -EINVAL;
	}

	if (insn[1].imm != 0) {
		verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
		return -EINVAL;
	}

	t = btf_type_by_id(btf_vmlinux, id);
	if (!t) {
		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
		return -ENOENT;
	}

	if (!btf_type_is_var(t)) {
		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
			id);
		return -EINVAL;
	}

	sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
	addr = kallsyms_lookup_name(sym_name);
	if (!addr) {
		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
			sym_name);
		return -ENOENT;
	}

H
Hao Luo 已提交
9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637
	datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
					   BTF_KIND_DATASEC);
	if (datasec_id > 0) {
		datasec = btf_type_by_id(btf_vmlinux, datasec_id);
		for_each_vsi(i, datasec, vsi) {
			if (vsi->type == id) {
				percpu = true;
				break;
			}
		}
	}

H
Hao Luo 已提交
9638 9639 9640 9641 9642
	insn[0].imm = (u32)addr;
	insn[1].imm = addr >> 32;

	type = t->type;
	t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
H
Hao Luo 已提交
9643 9644 9645 9646
	if (percpu) {
		aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
		aux->btf_var.btf_id = type;
	} else if (!btf_type_is_struct(t)) {
H
Hao Luo 已提交
9647 9648 9649 9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667
		const struct btf_type *ret;
		const char *tname;
		u32 tsize;

		/* resolve the type size of ksym. */
		ret = btf_resolve_size(btf_vmlinux, t, &tsize);
		if (IS_ERR(ret)) {
			tname = btf_name_by_offset(btf_vmlinux, t->name_off);
			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
				tname, PTR_ERR(ret));
			return -EINVAL;
		}
		aux->btf_var.reg_type = PTR_TO_MEM;
		aux->btf_var.mem_size = tsize;
	} else {
		aux->btf_var.reg_type = PTR_TO_BTF_ID;
		aux->btf_var.btf_id = type;
	}
	return 0;
}

9668 9669 9670
static int check_map_prealloc(struct bpf_map *map)
{
	return (map->map_type != BPF_MAP_TYPE_HASH &&
M
Martin KaFai Lau 已提交
9671 9672
		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
9673 9674 9675
		!(map->map_flags & BPF_F_NO_PREALLOC);
}

9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688
static bool is_tracing_prog_type(enum bpf_prog_type type)
{
	switch (type) {
	case BPF_PROG_TYPE_KPROBE:
	case BPF_PROG_TYPE_TRACEPOINT:
	case BPF_PROG_TYPE_PERF_EVENT:
	case BPF_PROG_TYPE_RAW_TRACEPOINT:
		return true;
	default:
		return false;
	}
}

9689 9690 9691 9692 9693 9694 9695 9696 9697
static bool is_preallocated_map(struct bpf_map *map)
{
	if (!check_map_prealloc(map))
		return false;
	if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
		return false;
	return true;
}

9698 9699
static int check_map_prog_compatibility(struct bpf_verifier_env *env,
					struct bpf_map *map,
9700 9701 9702
					struct bpf_prog *prog)

{
9703
	enum bpf_prog_type prog_type = resolve_prog_type(prog);
9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714
	/*
	 * Validate that trace type programs use preallocated hash maps.
	 *
	 * For programs attached to PERF events this is mandatory as the
	 * perf NMI can hit any arbitrary code sequence.
	 *
	 * All other trace types using preallocated hash maps are unsafe as
	 * well because tracepoint or kprobes can be inside locked regions
	 * of the memory allocator or at a place where a recursion into the
	 * memory allocator would see inconsistent state.
	 *
9715 9716 9717 9718 9719
	 * On RT enabled kernels run-time allocation of all trace type
	 * programs is strictly prohibited due to lock type constraints. On
	 * !RT kernels it is allowed for backwards compatibility reasons for
	 * now, but warnings are emitted so developers are made aware of
	 * the unsafety and can fix their programs before this is enforced.
9720
	 */
9721 9722
	if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
		if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
9723
			verbose(env, "perf_event programs can only use preallocated hash map\n");
9724 9725
			return -EINVAL;
		}
9726 9727 9728 9729
		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
			verbose(env, "trace type programs can only use preallocated hash map\n");
			return -EINVAL;
		}
9730 9731
		WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
		verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
9732
	}
9733

9734 9735
	if ((is_tracing_prog_type(prog_type) ||
	     prog_type == BPF_PROG_TYPE_SOCKET_FILTER) &&
9736 9737 9738 9739 9740
	    map_value_has_spin_lock(map)) {
		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
		return -EINVAL;
	}

9741
	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
9742
	    !bpf_offload_prog_map_match(prog, map)) {
9743 9744 9745 9746
		verbose(env, "offload device mismatch between prog and map\n");
		return -EINVAL;
	}

9747 9748 9749 9750 9751
	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
		return -EINVAL;
	}

9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768
	if (prog->aux->sleepable)
		switch (map->map_type) {
		case BPF_MAP_TYPE_HASH:
		case BPF_MAP_TYPE_LRU_HASH:
		case BPF_MAP_TYPE_ARRAY:
			if (!is_preallocated_map(map)) {
				verbose(env,
					"Sleepable programs can only use preallocated hash maps\n");
				return -EINVAL;
			}
			break;
		default:
			verbose(env,
				"Sleepable programs can only use array and hash maps\n");
			return -EINVAL;
		}

9769 9770 9771
	return 0;
}

9772 9773 9774 9775 9776 9777
static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
{
	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
}

H
Hao Luo 已提交
9778 9779 9780 9781 9782 9783
/* find and rewrite pseudo imm in ld_imm64 instructions:
 *
 * 1. if it accesses map FD, replace it with actual map pointer.
 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
 *
 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
9784
 */
H
Hao Luo 已提交
9785
static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
9786 9787 9788
{
	struct bpf_insn *insn = env->prog->insnsi;
	int insn_cnt = env->prog->len;
9789
	int i, j, err;
9790

9791
	err = bpf_prog_calc_tag(env->prog);
9792 9793 9794
	if (err)
		return err;

9795
	for (i = 0; i < insn_cnt; i++, insn++) {
9796
		if (BPF_CLASS(insn->code) == BPF_LDX &&
9797
		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
9798
			verbose(env, "BPF_LDX uses reserved fields\n");
9799 9800 9801
			return -EINVAL;
		}

9802 9803 9804
		if (BPF_CLASS(insn->code) == BPF_STX &&
		    ((BPF_MODE(insn->code) != BPF_MEM &&
		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
9805
			verbose(env, "BPF_STX uses reserved fields\n");
9806 9807 9808
			return -EINVAL;
		}

9809
		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
9810
			struct bpf_insn_aux_data *aux;
9811 9812
			struct bpf_map *map;
			struct fd f;
9813
			u64 addr;
9814 9815 9816 9817

			if (i == insn_cnt - 1 || insn[1].code != 0 ||
			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
			    insn[1].off != 0) {
9818
				verbose(env, "invalid bpf_ld_imm64 insn\n");
9819 9820 9821
				return -EINVAL;
			}

9822
			if (insn[0].src_reg == 0)
9823 9824 9825
				/* valid generic load 64-bit imm */
				goto next_insn;

H
Hao Luo 已提交
9826 9827 9828 9829 9830 9831 9832 9833
			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
				aux = &env->insn_aux_data[i];
				err = check_pseudo_btf_id(env, insn, aux);
				if (err)
					return err;
				goto next_insn;
			}

9834 9835 9836 9837 9838 9839 9840 9841 9842
			/* In final convert_pseudo_ld_imm64() step, this is
			 * converted into regular 64-bit imm load insn.
			 */
			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
			     insn[1].imm != 0)) {
				verbose(env,
					"unrecognized bpf_ld_imm64 insn\n");
9843 9844 9845
				return -EINVAL;
			}

9846
			f = fdget(insn[0].imm);
9847
			map = __bpf_map_get(f);
9848
			if (IS_ERR(map)) {
9849
				verbose(env, "fd %d is not pointing to valid bpf_map\n",
9850
					insn[0].imm);
9851 9852 9853
				return PTR_ERR(map);
			}

9854
			err = check_map_prog_compatibility(env, map, env->prog);
9855 9856 9857 9858 9859
			if (err) {
				fdput(f);
				return err;
			}

9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890 9891
			aux = &env->insn_aux_data[i];
			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
				addr = (unsigned long)map;
			} else {
				u32 off = insn[1].imm;

				if (off >= BPF_MAX_VAR_OFF) {
					verbose(env, "direct value offset of %u is not allowed\n", off);
					fdput(f);
					return -EINVAL;
				}

				if (!map->ops->map_direct_value_addr) {
					verbose(env, "no direct value access support for this map type\n");
					fdput(f);
					return -EINVAL;
				}

				err = map->ops->map_direct_value_addr(map, &addr, off);
				if (err) {
					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
						map->value_size, off);
					fdput(f);
					return err;
				}

				aux->map_off = off;
				addr += off;
			}

			insn[0].imm = (u32)addr;
			insn[1].imm = addr >> 32;
9892 9893

			/* check whether we recorded this map already */
9894
			for (j = 0; j < env->used_map_cnt; j++) {
9895
				if (env->used_maps[j] == map) {
9896
					aux->map_index = j;
9897 9898 9899
					fdput(f);
					goto next_insn;
				}
9900
			}
9901 9902 9903 9904 9905 9906 9907 9908 9909

			if (env->used_map_cnt >= MAX_USED_MAPS) {
				fdput(f);
				return -E2BIG;
			}

			/* hold the map. If the program is rejected by verifier,
			 * the map will be released by release_maps() or it
			 * will be used by the valid program until it's unloaded
9910
			 * and all maps are released in free_used_maps()
9911
			 */
9912
			bpf_map_inc(map);
9913 9914

			aux->map_index = env->used_map_cnt;
A
Alexei Starovoitov 已提交
9915 9916
			env->used_maps[env->used_map_cnt++] = map;

9917
			if (bpf_map_is_cgroup_storage(map) &&
9918
			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
9919
				verbose(env, "only one cgroup storage of each type is allowed\n");
9920 9921 9922 9923
				fdput(f);
				return -EBUSY;
			}

9924 9925 9926 9927
			fdput(f);
next_insn:
			insn++;
			i++;
9928 9929 9930 9931 9932 9933 9934
			continue;
		}

		/* Basic sanity check before we invest more work here. */
		if (!bpf_opcode_in_insntable(insn->code)) {
			verbose(env, "unknown opcode %02x\n", insn->code);
			return -EINVAL;
9935 9936 9937 9938 9939 9940 9941 9942 9943 9944 9945
		}
	}

	/* now all pseudo BPF_LD_IMM64 instructions load valid
	 * 'struct bpf_map *' into a register instead of user map_fd.
	 * These pointers will be used later by verifier to validate map access.
	 */
	return 0;
}

/* drop refcnt of maps used by the rejected program */
9946
static void release_maps(struct bpf_verifier_env *env)
9947
{
9948 9949
	__bpf_free_used_maps(env->prog->aux, env->used_maps,
			     env->used_map_cnt);
9950 9951 9952
}

/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
9953
static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
9954 9955 9956 9957 9958 9959 9960 9961 9962 9963
{
	struct bpf_insn *insn = env->prog->insnsi;
	int insn_cnt = env->prog->len;
	int i;

	for (i = 0; i < insn_cnt; i++, insn++)
		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
			insn->src_reg = 0;
}

9964 9965 9966 9967
/* single env->prog->insni[off] instruction was replaced with the range
 * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
 * [0, off) and [off, end) to new locations, so the patched range stays zero
 */
9968 9969
static int adjust_insn_aux_data(struct bpf_verifier_env *env,
				struct bpf_prog *new_prog, u32 off, u32 cnt)
9970 9971
{
	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
9972 9973
	struct bpf_insn *insn = new_prog->insnsi;
	u32 prog_len;
A
Alexei Starovoitov 已提交
9974
	int i;
9975

9976 9977 9978 9979 9980 9981
	/* aux info at OFF always needs adjustment, no matter fast path
	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
	 * original insn at old prog.
	 */
	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);

9982 9983
	if (cnt == 1)
		return 0;
9984
	prog_len = new_prog->len;
9985 9986
	new_data = vzalloc(array_size(prog_len,
				      sizeof(struct bpf_insn_aux_data)));
9987 9988 9989 9990 9991
	if (!new_data)
		return -ENOMEM;
	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
	memcpy(new_data + off + cnt - 1, old_data + off,
	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
9992
	for (i = off; i < off + cnt - 1; i++) {
9993
		new_data[i].seen = env->pass_cnt;
9994 9995
		new_data[i].zext_dst = insn_has_def32(env, insn + i);
	}
9996 9997 9998 9999 10000
	env->insn_aux_data = new_data;
	vfree(old_data);
	return 0;
}

10001 10002 10003 10004 10005 10006
static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
{
	int i;

	if (len == 1)
		return;
J
Jiong Wang 已提交
10007 10008
	/* NOTE: fake 'exit' subprog should be updated as well. */
	for (i = 0; i <= env->subprog_cnt; i++) {
10009
		if (env->subprog_info[i].start <= off)
10010
			continue;
10011
		env->subprog_info[i].start += len - 1;
10012 10013 10014
	}
}

10015 10016 10017 10018 10019 10020 10021 10022 10023 10024 10025 10026
static void adjust_poke_descs(struct bpf_prog *prog, u32 len)
{
	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
	int i, sz = prog->aux->size_poke_tab;
	struct bpf_jit_poke_descriptor *desc;

	for (i = 0; i < sz; i++) {
		desc = &tab[i];
		desc->insn_idx += len - 1;
	}
}

10027 10028 10029 10030 10031 10032
static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
					    const struct bpf_insn *patch, u32 len)
{
	struct bpf_prog *new_prog;

	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
10033 10034 10035 10036 10037
	if (IS_ERR(new_prog)) {
		if (PTR_ERR(new_prog) == -ERANGE)
			verbose(env,
				"insn %d cannot be patched due to 16-bit range\n",
				env->insn_aux_data[off].orig_idx);
10038
		return NULL;
10039
	}
10040
	if (adjust_insn_aux_data(env, new_prog, off, len))
10041
		return NULL;
10042
	adjust_subprog_starts(env, off, len);
10043
	adjust_poke_descs(new_prog, len);
10044 10045 10046
	return new_prog;
}

J
Jakub Kicinski 已提交
10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169 10170 10171 10172
static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
					      u32 off, u32 cnt)
{
	int i, j;

	/* find first prog starting at or after off (first to remove) */
	for (i = 0; i < env->subprog_cnt; i++)
		if (env->subprog_info[i].start >= off)
			break;
	/* find first prog starting at or after off + cnt (first to stay) */
	for (j = i; j < env->subprog_cnt; j++)
		if (env->subprog_info[j].start >= off + cnt)
			break;
	/* if j doesn't start exactly at off + cnt, we are just removing
	 * the front of previous prog
	 */
	if (env->subprog_info[j].start != off + cnt)
		j--;

	if (j > i) {
		struct bpf_prog_aux *aux = env->prog->aux;
		int move;

		/* move fake 'exit' subprog as well */
		move = env->subprog_cnt + 1 - j;

		memmove(env->subprog_info + i,
			env->subprog_info + j,
			sizeof(*env->subprog_info) * move);
		env->subprog_cnt -= j - i;

		/* remove func_info */
		if (aux->func_info) {
			move = aux->func_info_cnt - j;

			memmove(aux->func_info + i,
				aux->func_info + j,
				sizeof(*aux->func_info) * move);
			aux->func_info_cnt -= j - i;
			/* func_info->insn_off is set after all code rewrites,
			 * in adjust_btf_func() - no need to adjust
			 */
		}
	} else {
		/* convert i from "first prog to remove" to "first to adjust" */
		if (env->subprog_info[i].start == off)
			i++;
	}

	/* update fake 'exit' subprog as well */
	for (; i <= env->subprog_cnt; i++)
		env->subprog_info[i].start -= cnt;

	return 0;
}

static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
				      u32 cnt)
{
	struct bpf_prog *prog = env->prog;
	u32 i, l_off, l_cnt, nr_linfo;
	struct bpf_line_info *linfo;

	nr_linfo = prog->aux->nr_linfo;
	if (!nr_linfo)
		return 0;

	linfo = prog->aux->linfo;

	/* find first line info to remove, count lines to be removed */
	for (i = 0; i < nr_linfo; i++)
		if (linfo[i].insn_off >= off)
			break;

	l_off = i;
	l_cnt = 0;
	for (; i < nr_linfo; i++)
		if (linfo[i].insn_off < off + cnt)
			l_cnt++;
		else
			break;

	/* First live insn doesn't match first live linfo, it needs to "inherit"
	 * last removed linfo.  prog is already modified, so prog->len == off
	 * means no live instructions after (tail of the program was removed).
	 */
	if (prog->len != off && l_cnt &&
	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
		l_cnt--;
		linfo[--i].insn_off = off + cnt;
	}

	/* remove the line info which refer to the removed instructions */
	if (l_cnt) {
		memmove(linfo + l_off, linfo + i,
			sizeof(*linfo) * (nr_linfo - i));

		prog->aux->nr_linfo -= l_cnt;
		nr_linfo = prog->aux->nr_linfo;
	}

	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
	for (i = l_off; i < nr_linfo; i++)
		linfo[i].insn_off -= cnt;

	/* fix up all subprogs (incl. 'exit') which start >= off */
	for (i = 0; i <= env->subprog_cnt; i++)
		if (env->subprog_info[i].linfo_idx > l_off) {
			/* program may have started in the removed region but
			 * may not be fully removed
			 */
			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
				env->subprog_info[i].linfo_idx -= l_cnt;
			else
				env->subprog_info[i].linfo_idx = l_off;
		}

	return 0;
}

static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
{
	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
	unsigned int orig_prog_len = env->prog->len;
	int err;

10173 10174 10175
	if (bpf_prog_is_dev_bound(env->prog->aux))
		bpf_prog_offload_remove_insns(env, off, cnt);

J
Jakub Kicinski 已提交
10176 10177 10178 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188 10189 10190 10191 10192 10193
	err = bpf_remove_insns(env->prog, off, cnt);
	if (err)
		return err;

	err = adjust_subprog_starts_after_remove(env, off, cnt);
	if (err)
		return err;

	err = bpf_adj_linfo_after_remove(env, off, cnt);
	if (err)
		return err;

	memmove(aux_data + off,	aux_data + off + cnt,
		sizeof(*aux_data) * (orig_prog_len - off - cnt));

	return 0;
}

10194 10195 10196 10197 10198 10199 10200 10201 10202 10203
/* The verifier does more data flow analysis than llvm and will not
 * explore branches that are dead at run time. Malicious programs can
 * have dead code too. Therefore replace all dead at-run-time code
 * with 'ja -1'.
 *
 * Just nops are not optimal, e.g. if they would sit at the end of the
 * program and through another bug we would manage to jump there, then
 * we'd execute beyond program memory otherwise. Returning exception
 * code also wouldn't work since we can have subprogs where the dead
 * code could be located.
A
Alexei Starovoitov 已提交
10204 10205 10206 10207
 */
static void sanitize_dead_code(struct bpf_verifier_env *env)
{
	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10208
	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
A
Alexei Starovoitov 已提交
10209 10210 10211 10212 10213 10214 10215
	struct bpf_insn *insn = env->prog->insnsi;
	const int insn_cnt = env->prog->len;
	int i;

	for (i = 0; i < insn_cnt; i++) {
		if (aux_data[i].seen)
			continue;
10216
		memcpy(insn + i, &trap, sizeof(trap));
A
Alexei Starovoitov 已提交
10217 10218 10219
	}
}

10220 10221 10222 10223
static bool insn_is_cond_jump(u8 code)
{
	u8 op;

J
Jiong Wang 已提交
10224 10225 10226
	if (BPF_CLASS(code) == BPF_JMP32)
		return true;

10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238 10239 10240 10241 10242 10243 10244 10245 10246 10247 10248 10249 10250 10251 10252
	if (BPF_CLASS(code) != BPF_JMP)
		return false;

	op = BPF_OP(code);
	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
}

static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
{
	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
	struct bpf_insn *insn = env->prog->insnsi;
	const int insn_cnt = env->prog->len;
	int i;

	for (i = 0; i < insn_cnt; i++, insn++) {
		if (!insn_is_cond_jump(insn->code))
			continue;

		if (!aux_data[i + 1].seen)
			ja.off = insn->off;
		else if (!aux_data[i + 1 + insn->off].seen)
			ja.off = 0;
		else
			continue;

10253 10254 10255
		if (bpf_prog_is_dev_bound(env->prog->aux))
			bpf_prog_offload_replace_insn(env, i, &ja);

10256 10257 10258 10259
		memcpy(insn, &ja, sizeof(ja));
	}
}

J
Jakub Kicinski 已提交
10260 10261 10262 10263 10264 10265 10266 10267 10268 10269 10270 10271 10272 10273 10274 10275 10276 10277 10278 10279 10280 10281 10282 10283
static int opt_remove_dead_code(struct bpf_verifier_env *env)
{
	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
	int insn_cnt = env->prog->len;
	int i, err;

	for (i = 0; i < insn_cnt; i++) {
		int j;

		j = 0;
		while (i + j < insn_cnt && !aux_data[i + j].seen)
			j++;
		if (!j)
			continue;

		err = verifier_remove_insns(env, i, j);
		if (err)
			return err;
		insn_cnt = env->prog->len;
	}

	return 0;
}

10284 10285 10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302 10303 10304
static int opt_remove_nops(struct bpf_verifier_env *env)
{
	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
	struct bpf_insn *insn = env->prog->insnsi;
	int insn_cnt = env->prog->len;
	int i, err;

	for (i = 0; i < insn_cnt; i++) {
		if (memcmp(&insn[i], &ja, sizeof(ja)))
			continue;

		err = verifier_remove_insns(env, i, 1);
		if (err)
			return err;
		insn_cnt--;
		i--;
	}

	return 0;
}

10305 10306
static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
					 const union bpf_attr *attr)
10307
{
10308
	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
10309
	struct bpf_insn_aux_data *aux = env->insn_aux_data;
10310
	int i, patch_len, delta = 0, len = env->prog->len;
10311 10312
	struct bpf_insn *insns = env->prog->insnsi;
	struct bpf_prog *new_prog;
10313
	bool rnd_hi32;
10314

10315
	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
10316
	zext_patch[1] = BPF_ZEXT_REG(0);
10317 10318 10319
	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
10320 10321 10322 10323
	for (i = 0; i < len; i++) {
		int adj_idx = i + delta;
		struct bpf_insn insn;

10324 10325 10326 10327 10328 10329 10330 10331 10332 10333 10334 10335 10336 10337 10338 10339 10340 10341 10342 10343 10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358 10359 10360 10361 10362
		insn = insns[adj_idx];
		if (!aux[adj_idx].zext_dst) {
			u8 code, class;
			u32 imm_rnd;

			if (!rnd_hi32)
				continue;

			code = insn.code;
			class = BPF_CLASS(code);
			if (insn_no_def(&insn))
				continue;

			/* NOTE: arg "reg" (the fourth one) is only used for
			 *       BPF_STX which has been ruled out in above
			 *       check, it is safe to pass NULL here.
			 */
			if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
				if (class == BPF_LD &&
				    BPF_MODE(code) == BPF_IMM)
					i++;
				continue;
			}

			/* ctx load could be transformed into wider load. */
			if (class == BPF_LDX &&
			    aux[adj_idx].ptr_type == PTR_TO_CTX)
				continue;

			imm_rnd = get_random_int();
			rnd_hi32_patch[0] = insn;
			rnd_hi32_patch[1].imm = imm_rnd;
			rnd_hi32_patch[3].dst_reg = insn.dst_reg;
			patch = rnd_hi32_patch;
			patch_len = 4;
			goto apply_patch_buffer;
		}

		if (!bpf_jit_needs_zext())
10363 10364 10365 10366 10367
			continue;

		zext_patch[0] = insn;
		zext_patch[1].dst_reg = insn.dst_reg;
		zext_patch[1].src_reg = insn.dst_reg;
10368 10369 10370 10371
		patch = zext_patch;
		patch_len = 2;
apply_patch_buffer:
		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
10372 10373 10374 10375 10376
		if (!new_prog)
			return -ENOMEM;
		env->prog = new_prog;
		insns = new_prog->insnsi;
		aux = env->insn_aux_data;
10377
		delta += patch_len - 1;
10378 10379 10380 10381 10382
	}

	return 0;
}

10383 10384 10385 10386
/* convert load instructions that access fields of a context type into a
 * sequence of instructions that access fields of the underlying structure:
 *     struct __sk_buff    -> struct sk_buff
 *     struct bpf_sock_ops -> struct sock
10387
 */
10388
static int convert_ctx_accesses(struct bpf_verifier_env *env)
10389
{
10390
	const struct bpf_verifier_ops *ops = env->ops;
10391
	int i, cnt, size, ctx_field_size, delta = 0;
10392
	const int insn_cnt = env->prog->len;
10393
	struct bpf_insn insn_buf[16], *insn;
10394
	u32 target_size, size_default, off;
10395
	struct bpf_prog *new_prog;
10396
	enum bpf_access_type type;
10397
	bool is_narrower_load;
10398

10399 10400 10401 10402 10403
	if (ops->gen_prologue || env->seen_direct_write) {
		if (!ops->gen_prologue) {
			verbose(env, "bpf verifier is misconfigured\n");
			return -EINVAL;
		}
10404 10405 10406
		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
					env->prog);
		if (cnt >= ARRAY_SIZE(insn_buf)) {
10407
			verbose(env, "bpf verifier is misconfigured\n");
10408 10409
			return -EINVAL;
		} else if (cnt) {
10410
			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
10411 10412
			if (!new_prog)
				return -ENOMEM;
10413

10414
			env->prog = new_prog;
10415
			delta += cnt - 1;
10416 10417 10418
		}
	}

10419
	if (bpf_prog_is_dev_bound(env->prog->aux))
10420 10421
		return 0;

10422
	insn = env->prog->insnsi + delta;
10423

10424
	for (i = 0; i < insn_cnt; i++, insn++) {
10425 10426
		bpf_convert_ctx_access_t convert_ctx_access;

10427 10428 10429
		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
10430
		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
10431
			type = BPF_READ;
10432 10433 10434
		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
10435
			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
10436 10437
			type = BPF_WRITE;
		else
10438 10439
			continue;

10440 10441 10442 10443 10444 10445 10446 10447 10448 10449 10450 10451 10452 10453 10454 10455 10456 10457 10458 10459 10460 10461 10462 10463 10464 10465 10466 10467
		if (type == BPF_WRITE &&
		    env->insn_aux_data[i + delta].sanitize_stack_off) {
			struct bpf_insn patch[] = {
				/* Sanitize suspicious stack slot with zero.
				 * There are no memory dependencies for this store,
				 * since it's only using frame pointer and immediate
				 * constant of zero
				 */
				BPF_ST_MEM(BPF_DW, BPF_REG_FP,
					   env->insn_aux_data[i + delta].sanitize_stack_off,
					   0),
				/* the original STX instruction will immediately
				 * overwrite the same stack slot with appropriate value
				 */
				*insn,
			};

			cnt = ARRAY_SIZE(patch);
			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
			if (!new_prog)
				return -ENOMEM;

			delta    += cnt - 1;
			env->prog = new_prog;
			insn      = new_prog->insnsi + i + delta;
			continue;
		}

10468 10469 10470 10471 10472 10473 10474
		switch (env->insn_aux_data[i + delta].ptr_type) {
		case PTR_TO_CTX:
			if (!ops->convert_ctx_access)
				continue;
			convert_ctx_access = ops->convert_ctx_access;
			break;
		case PTR_TO_SOCKET:
10475
		case PTR_TO_SOCK_COMMON:
10476 10477
			convert_ctx_access = bpf_sock_convert_ctx_access;
			break;
10478 10479 10480
		case PTR_TO_TCP_SOCK:
			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
			break;
10481 10482 10483
		case PTR_TO_XDP_SOCK:
			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
			break;
10484
		case PTR_TO_BTF_ID:
10485 10486 10487 10488
			if (type == BPF_READ) {
				insn->code = BPF_LDX | BPF_PROBE_MEM |
					BPF_SIZE((insn)->code);
				env->prog->aux->num_exentries++;
10489
			} else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
10490 10491 10492 10493
				verbose(env, "Writes through BTF pointers are not allowed\n");
				return -EINVAL;
			}
			continue;
10494
		default:
10495
			continue;
10496
		}
10497

10498
		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
10499
		size = BPF_LDST_BYTES(insn);
10500 10501 10502 10503 10504 10505

		/* If the read access is a narrower load of the field,
		 * convert to a 4/8-byte load, to minimum program type specific
		 * convert_ctx_access changes. If conversion is successful,
		 * we will apply proper mask to the result.
		 */
10506
		is_narrower_load = size < ctx_field_size;
10507 10508
		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
		off = insn->off;
10509
		if (is_narrower_load) {
10510 10511 10512
			u8 size_code;

			if (type == BPF_WRITE) {
10513
				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
10514 10515
				return -EINVAL;
			}
10516

10517
			size_code = BPF_H;
10518 10519 10520 10521
			if (ctx_field_size == 4)
				size_code = BPF_W;
			else if (ctx_field_size == 8)
				size_code = BPF_DW;
10522

10523
			insn->off = off & ~(size_default - 1);
10524 10525
			insn->code = BPF_LDX | BPF_MEM | size_code;
		}
10526 10527

		target_size = 0;
10528 10529
		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
					 &target_size);
10530 10531
		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
		    (ctx_field_size && !target_size)) {
10532
			verbose(env, "bpf verifier is misconfigured\n");
10533 10534
			return -EINVAL;
		}
10535 10536

		if (is_narrower_load && size < target_size) {
10537 10538
			u8 shift = bpf_ctx_narrow_access_offset(
				off, size, size_default) * 8;
10539 10540 10541 10542 10543
			if (ctx_field_size <= 4) {
				if (shift)
					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
									insn->dst_reg,
									shift);
10544
				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
10545
								(1 << size * 8) - 1);
10546 10547 10548 10549 10550
			} else {
				if (shift)
					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
									insn->dst_reg,
									shift);
10551
				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
10552
								(1ULL << size * 8) - 1);
10553
			}
10554
		}
10555

10556
		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
10557 10558 10559
		if (!new_prog)
			return -ENOMEM;

10560
		delta += cnt - 1;
10561 10562 10563

		/* keep walking new program and skip insns we just inserted */
		env->prog = new_prog;
10564
		insn      = new_prog->insnsi + i + delta;
10565 10566 10567 10568 10569
	}

	return 0;
}

10570 10571 10572 10573
static int jit_subprogs(struct bpf_verifier_env *env)
{
	struct bpf_prog *prog = env->prog, **func, *tmp;
	int i, j, subprog_start, subprog_end = 0, len, subprog;
10574
	struct bpf_map *map_ptr;
10575
	struct bpf_insn *insn;
10576
	void *old_bpf_func;
10577
	int err, num_exentries;
10578

J
Jiong Wang 已提交
10579
	if (env->subprog_cnt <= 1)
10580 10581
		return 0;

10582
	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
10583 10584 10585
		if (insn->code != (BPF_JMP | BPF_CALL) ||
		    insn->src_reg != BPF_PSEUDO_CALL)
			continue;
10586 10587 10588 10589
		/* Upon error here we cannot fall back to interpreter but
		 * need a hard reject of the program. Thus -EFAULT is
		 * propagated in any case.
		 */
10590 10591 10592 10593 10594 10595 10596 10597 10598
		subprog = find_subprog(env, i + insn->imm + 1);
		if (subprog < 0) {
			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
				  i + insn->imm + 1);
			return -EFAULT;
		}
		/* temporarily remember subprog id inside insn instead of
		 * aux_data, since next loop will split up all insns into funcs
		 */
J
Jiong Wang 已提交
10599
		insn->off = subprog;
10600 10601 10602 10603 10604 10605 10606 10607
		/* remember original imm in case JIT fails and fallback
		 * to interpreter will be needed
		 */
		env->insn_aux_data[i].call_imm = insn->imm;
		/* point imm to __bpf_call_base+1 from JITs point of view */
		insn->imm = 1;
	}

M
Martin KaFai Lau 已提交
10608 10609 10610 10611 10612
	err = bpf_prog_alloc_jited_linfo(prog);
	if (err)
		goto out_undo_insn;

	err = -ENOMEM;
K
Kees Cook 已提交
10613
	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
10614
	if (!func)
10615
		goto out_undo_insn;
10616

J
Jiong Wang 已提交
10617
	for (i = 0; i < env->subprog_cnt; i++) {
10618
		subprog_start = subprog_end;
J
Jiong Wang 已提交
10619
		subprog_end = env->subprog_info[i + 1].start;
10620 10621

		len = subprog_end - subprog_start;
A
Alexei Starovoitov 已提交
10622 10623 10624 10625 10626 10627
		/* BPF_PROG_RUN doesn't call subprogs directly,
		 * hence main prog stats include the runtime of subprogs.
		 * subprogs don't have IDs and not reachable via prog_get_next_id
		 * func[i]->aux->stats will never be accessed and stays NULL
		 */
		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
10628 10629 10630 10631
		if (!func[i])
			goto out_free;
		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
		       len * sizeof(struct bpf_insn));
10632
		func[i]->type = prog->type;
10633
		func[i]->len = len;
10634 10635
		if (bpf_prog_calc_tag(func[i]))
			goto out_free;
10636
		func[i]->is_func = 1;
10637 10638 10639 10640 10641
		func[i]->aux->func_idx = i;
		/* the btf and func_info will be freed only at prog->aux */
		func[i]->aux->btf = prog->aux->btf;
		func[i]->aux->func_info = prog->aux->func_info;

10642 10643 10644 10645 10646 10647 10648 10649 10650 10651 10652 10653 10654 10655 10656 10657 10658 10659 10660 10661 10662 10663 10664 10665 10666
		for (j = 0; j < prog->aux->size_poke_tab; j++) {
			u32 insn_idx = prog->aux->poke_tab[j].insn_idx;
			int ret;

			if (!(insn_idx >= subprog_start &&
			      insn_idx <= subprog_end))
				continue;

			ret = bpf_jit_add_poke_descriptor(func[i],
							  &prog->aux->poke_tab[j]);
			if (ret < 0) {
				verbose(env, "adding tail call poke descriptor failed\n");
				goto out_free;
			}

			func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;

			map_ptr = func[i]->aux->poke_tab[ret].tail_call.map;
			ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux);
			if (ret < 0) {
				verbose(env, "tracking tail call prog failed\n");
				goto out_free;
			}
		}

10667 10668 10669 10670
		/* Use bpf_prog_F_tag to indicate functions in stack traces.
		 * Long term would need debug info to populate names
		 */
		func[i]->aux->name[0] = 'F';
10671
		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
10672
		func[i]->jit_requested = 1;
M
Martin KaFai Lau 已提交
10673 10674 10675 10676
		func[i]->aux->linfo = prog->aux->linfo;
		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
10677 10678 10679 10680 10681 10682 10683 10684
		num_exentries = 0;
		insn = func[i]->insnsi;
		for (j = 0; j < func[i]->len; j++, insn++) {
			if (BPF_CLASS(insn->code) == BPF_LDX &&
			    BPF_MODE(insn->code) == BPF_PROBE_MEM)
				num_exentries++;
		}
		func[i]->aux->num_exentries = num_exentries;
10685
		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
10686 10687 10688 10689 10690 10691 10692
		func[i] = bpf_int_jit_compile(func[i]);
		if (!func[i]->jited) {
			err = -ENOTSUPP;
			goto out_free;
		}
		cond_resched();
	}
10693 10694 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705

	/* Untrack main program's aux structs so that during map_poke_run()
	 * we will not stumble upon the unfilled poke descriptors; each
	 * of the main program's poke descs got distributed across subprogs
	 * and got tracked onto map, so we are sure that none of them will
	 * be missed after the operation below
	 */
	for (i = 0; i < prog->aux->size_poke_tab; i++) {
		map_ptr = prog->aux->poke_tab[i].tail_call.map;

		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
	}

10706 10707 10708 10709
	/* at this point all bpf functions were successfully JITed
	 * now populate all bpf_calls with correct addresses and
	 * run last pass of JIT
	 */
J
Jiong Wang 已提交
10710
	for (i = 0; i < env->subprog_cnt; i++) {
10711 10712 10713 10714 10715 10716
		insn = func[i]->insnsi;
		for (j = 0; j < func[i]->len; j++, insn++) {
			if (insn->code != (BPF_JMP | BPF_CALL) ||
			    insn->src_reg != BPF_PSEUDO_CALL)
				continue;
			subprog = insn->off;
10717 10718
			insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
				    __bpf_call_base;
10719
		}
10720 10721 10722 10723 10724 10725 10726 10727 10728 10729 10730 10731 10732 10733

		/* we use the aux data to keep a list of the start addresses
		 * of the JITed images for each function in the program
		 *
		 * for some architectures, such as powerpc64, the imm field
		 * might not be large enough to hold the offset of the start
		 * address of the callee's JITed image from __bpf_call_base
		 *
		 * in such cases, we can lookup the start address of a callee
		 * by using its subprog id, available from the off field of
		 * the call instruction, as an index for this list
		 */
		func[i]->aux->func = func;
		func[i]->aux->func_cnt = env->subprog_cnt;
10734
	}
J
Jiong Wang 已提交
10735
	for (i = 0; i < env->subprog_cnt; i++) {
10736 10737 10738 10739
		old_bpf_func = func[i]->bpf_func;
		tmp = bpf_int_jit_compile(func[i]);
		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
10740
			err = -ENOTSUPP;
10741 10742 10743 10744 10745 10746 10747 10748
			goto out_free;
		}
		cond_resched();
	}

	/* finally lock prog and jit images for all functions and
	 * populate kallsysm
	 */
J
Jiong Wang 已提交
10749
	for (i = 0; i < env->subprog_cnt; i++) {
10750 10751 10752
		bpf_prog_lock_ro(func[i]);
		bpf_prog_kallsyms_add(func[i]);
	}
10753 10754 10755 10756 10757 10758 10759 10760 10761 10762 10763

	/* Last step: make now unused interpreter insns from main
	 * prog consistent for later dump requests, so they can
	 * later look the same as if they were interpreted only.
	 */
	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
		if (insn->code != (BPF_JMP | BPF_CALL) ||
		    insn->src_reg != BPF_PSEUDO_CALL)
			continue;
		insn->off = env->insn_aux_data[i].call_imm;
		subprog = find_subprog(env, i + insn->off + 1);
10764
		insn->imm = subprog;
10765 10766
	}

10767 10768 10769
	prog->jited = 1;
	prog->bpf_func = func[0]->bpf_func;
	prog->aux->func = func;
J
Jiong Wang 已提交
10770
	prog->aux->func_cnt = env->subprog_cnt;
M
Martin KaFai Lau 已提交
10771
	bpf_prog_free_unused_jited_linfo(prog);
10772 10773
	return 0;
out_free:
10774 10775 10776 10777 10778 10779 10780 10781 10782 10783
	for (i = 0; i < env->subprog_cnt; i++) {
		if (!func[i])
			continue;

		for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
			map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
			map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
		}
		bpf_jit_free(func[i]);
	}
10784
	kfree(func);
10785
out_undo_insn:
10786 10787 10788 10789 10790 10791 10792 10793 10794
	/* cleanup main prog to be interpreted */
	prog->jit_requested = 0;
	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
		if (insn->code != (BPF_JMP | BPF_CALL) ||
		    insn->src_reg != BPF_PSEUDO_CALL)
			continue;
		insn->off = 0;
		insn->imm = env->insn_aux_data[i].call_imm;
	}
M
Martin KaFai Lau 已提交
10795
	bpf_prog_free_jited_linfo(prog);
10796 10797 10798
	return err;
}

10799 10800
static int fixup_call_args(struct bpf_verifier_env *env)
{
10801
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
10802 10803 10804
	struct bpf_prog *prog = env->prog;
	struct bpf_insn *insn = prog->insnsi;
	int i, depth;
10805
#endif
10806
	int err = 0;
10807

10808 10809
	if (env->prog->jit_requested &&
	    !bpf_prog_is_dev_bound(env->prog->aux)) {
10810 10811
		err = jit_subprogs(env);
		if (err == 0)
10812
			return 0;
10813 10814
		if (err == -EFAULT)
			return err;
10815 10816
	}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
10817 10818 10819 10820 10821 10822 10823
	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
		/* When JIT fails the progs with bpf2bpf calls and tail_calls
		 * have to be rejected, since interpreter doesn't support them yet.
		 */
		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
		return -EINVAL;
	}
10824 10825 10826 10827 10828 10829 10830 10831 10832
	for (i = 0; i < prog->len; i++, insn++) {
		if (insn->code != (BPF_JMP | BPF_CALL) ||
		    insn->src_reg != BPF_PSEUDO_CALL)
			continue;
		depth = get_callee_stack_depth(env, insn, i);
		if (depth < 0)
			return depth;
		bpf_patch_call_args(insn, depth);
	}
10833 10834 10835
	err = 0;
#endif
	return err;
10836 10837
}

10838
/* fixup insn->imm field of bpf_call instructions
10839
 * and inline eligible helpers as explicit sequence of BPF instructions
10840 10841 10842
 *
 * this function is called after eBPF program passed verification
 */
10843
static int fixup_bpf_calls(struct bpf_verifier_env *env)
10844
{
10845
	struct bpf_prog *prog = env->prog;
10846
	bool expect_blinding = bpf_jit_blinding_enabled(prog);
10847
	struct bpf_insn *insn = prog->insnsi;
10848
	const struct bpf_func_proto *fn;
10849
	const int insn_cnt = prog->len;
10850
	const struct bpf_map_ops *ops;
10851
	struct bpf_insn_aux_data *aux;
10852 10853 10854
	struct bpf_insn insn_buf[16];
	struct bpf_prog *new_prog;
	struct bpf_map *map_ptr;
10855
	int i, ret, cnt, delta = 0;
10856

10857
	for (i = 0; i < insn_cnt; i++, insn++) {
10858 10859 10860
		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
10861
		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
10862 10863 10864 10865 10866 10867 10868 10869 10870 10871 10872 10873 10874 10875 10876 10877 10878 10879 10880 10881 10882 10883 10884 10885 10886 10887 10888
			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
			struct bpf_insn mask_and_div[] = {
				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
				/* Rx div 0 -> 0 */
				BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
				*insn,
			};
			struct bpf_insn mask_and_mod[] = {
				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
				/* Rx mod 0 -> Rx */
				BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
				*insn,
			};
			struct bpf_insn *patchlet;

			if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
			    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
				patchlet = mask_and_div + (is64 ? 1 : 0);
				cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
			} else {
				patchlet = mask_and_mod + (is64 ? 1 : 0);
				cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
			}

			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
10889 10890 10891 10892 10893 10894 10895 10896 10897
			if (!new_prog)
				return -ENOMEM;

			delta    += cnt - 1;
			env->prog = prog = new_prog;
			insn      = new_prog->insnsi + i + delta;
			continue;
		}

10898 10899 10900 10901 10902 10903 10904 10905 10906 10907 10908 10909 10910 10911 10912 10913 10914 10915 10916
		if (BPF_CLASS(insn->code) == BPF_LD &&
		    (BPF_MODE(insn->code) == BPF_ABS ||
		     BPF_MODE(insn->code) == BPF_IND)) {
			cnt = env->ops->gen_ld_abs(insn, insn_buf);
			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
				verbose(env, "bpf verifier is misconfigured\n");
				return -EINVAL;
			}

			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
			if (!new_prog)
				return -ENOMEM;

			delta    += cnt - 1;
			env->prog = prog = new_prog;
			insn      = new_prog->insnsi + i + delta;
			continue;
		}

10917 10918 10919 10920 10921 10922 10923 10924 10925 10926
		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
			struct bpf_insn insn_buf[16];
			struct bpf_insn *patch = &insn_buf[0];
			bool issrc, isneg;
			u32 off_reg;

			aux = &env->insn_aux_data[i + delta];
10927 10928
			if (!aux->alu_state ||
			    aux->alu_state == BPF_ALU_NON_POINTER)
10929 10930 10931 10932 10933 10934 10935 10936 10937 10938 10939 10940 10941 10942 10943 10944 10945 10946 10947 10948 10949 10950 10951 10952 10953 10954 10955 10956 10957 10958 10959 10960 10961 10962 10963 10964 10965 10966 10967 10968
				continue;

			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
				BPF_ALU_SANITIZE_SRC;

			off_reg = issrc ? insn->src_reg : insn->dst_reg;
			if (isneg)
				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
			*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
			*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
			*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
			*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
			if (issrc) {
				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
							 off_reg);
				insn->src_reg = BPF_REG_AX;
			} else {
				*patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
							 BPF_REG_AX);
			}
			if (isneg)
				insn->code = insn->code == code_add ?
					     code_sub : code_add;
			*patch++ = *insn;
			if (issrc && isneg)
				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
			cnt = patch - insn_buf;

			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
			if (!new_prog)
				return -ENOMEM;

			delta    += cnt - 1;
			env->prog = prog = new_prog;
			insn      = new_prog->insnsi + i + delta;
			continue;
		}

10969 10970
		if (insn->code != (BPF_JMP | BPF_CALL))
			continue;
10971 10972
		if (insn->src_reg == BPF_PSEUDO_CALL)
			continue;
10973

10974 10975 10976 10977
		if (insn->imm == BPF_FUNC_get_route_realm)
			prog->dst_needed = 1;
		if (insn->imm == BPF_FUNC_get_prandom_u32)
			bpf_user_rnd_init_once();
10978 10979
		if (insn->imm == BPF_FUNC_override_return)
			prog->kprobe_override = 1;
10980
		if (insn->imm == BPF_FUNC_tail_call) {
10981 10982 10983 10984 10985 10986
			/* If we tail call into other programs, we
			 * cannot make any assumptions since they can
			 * be replaced dynamically during runtime in
			 * the program array.
			 */
			prog->cb_access = 1;
10987 10988 10989
			if (!allow_tail_call_in_subprogs(env))
				prog->aux->stack_depth = MAX_BPF_STACK;
			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
10990

10991 10992 10993 10994
			/* mark bpf_tail_call as different opcode to avoid
			 * conditional branch in the interpeter for every normal
			 * call and to prevent accidental JITing by JIT compiler
			 * that doesn't support bpf_tail_call yet
10995
			 */
10996
			insn->imm = 0;
10997
			insn->code = BPF_JMP | BPF_TAIL_CALL;
10998

10999
			aux = &env->insn_aux_data[i + delta];
A
Alexei Starovoitov 已提交
11000
			if (env->bpf_capable && !expect_blinding &&
11001
			    prog->jit_requested &&
11002 11003 11004 11005 11006 11007 11008
			    !bpf_map_key_poisoned(aux) &&
			    !bpf_map_ptr_poisoned(aux) &&
			    !bpf_map_ptr_unpriv(aux)) {
				struct bpf_jit_poke_descriptor desc = {
					.reason = BPF_POKE_REASON_TAIL_CALL,
					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
					.tail_call.key = bpf_map_key_immediate(aux),
11009
					.insn_idx = i + delta,
11010 11011 11012 11013 11014 11015 11016 11017 11018 11019 11020 11021
				};

				ret = bpf_jit_add_poke_descriptor(prog, &desc);
				if (ret < 0) {
					verbose(env, "adding tail call poke descriptor failed\n");
					return ret;
				}

				insn->imm = ret + 1;
				continue;
			}

11022 11023 11024
			if (!bpf_map_ptr_unpriv(aux))
				continue;

11025 11026 11027 11028 11029 11030
			/* instead of changing every JIT dealing with tail_call
			 * emit two extra insns:
			 * if (index >= max_entries) goto out;
			 * index &= array->index_mask;
			 * to avoid out-of-bounds cpu speculation
			 */
11031
			if (bpf_map_ptr_poisoned(aux)) {
11032
				verbose(env, "tail_call abusing map_ptr\n");
11033 11034
				return -EINVAL;
			}
11035

11036
			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11037 11038 11039 11040 11041 11042 11043 11044 11045 11046 11047 11048 11049 11050 11051
			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
						  map_ptr->max_entries, 2);
			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
						    container_of(map_ptr,
								 struct bpf_array,
								 map)->index_mask);
			insn_buf[2] = *insn;
			cnt = 3;
			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
			if (!new_prog)
				return -ENOMEM;

			delta    += cnt - 1;
			env->prog = prog = new_prog;
			insn      = new_prog->insnsi + i + delta;
11052 11053
			continue;
		}
11054

11055
		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
11056 11057
		 * and other inlining handlers are currently limited to 64 bit
		 * only.
11058
		 */
11059
		if (prog->jit_requested && BITS_PER_LONG == 64 &&
11060 11061
		    (insn->imm == BPF_FUNC_map_lookup_elem ||
		     insn->imm == BPF_FUNC_map_update_elem ||
11062 11063 11064 11065
		     insn->imm == BPF_FUNC_map_delete_elem ||
		     insn->imm == BPF_FUNC_map_push_elem   ||
		     insn->imm == BPF_FUNC_map_pop_elem    ||
		     insn->imm == BPF_FUNC_map_peek_elem)) {
11066 11067 11068 11069
			aux = &env->insn_aux_data[i + delta];
			if (bpf_map_ptr_poisoned(aux))
				goto patch_call_imm;

11070
			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11071 11072 11073 11074
			ops = map_ptr->ops;
			if (insn->imm == BPF_FUNC_map_lookup_elem &&
			    ops->map_gen_lookup) {
				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
11075 11076 11077
				if (cnt == -EOPNOTSUPP)
					goto patch_map_ops_generic;
				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11078 11079 11080
					verbose(env, "bpf verifier is misconfigured\n");
					return -EINVAL;
				}
11081

11082 11083 11084 11085
				new_prog = bpf_patch_insn_data(env, i + delta,
							       insn_buf, cnt);
				if (!new_prog)
					return -ENOMEM;
11086

11087 11088 11089 11090 11091
				delta    += cnt - 1;
				env->prog = prog = new_prog;
				insn      = new_prog->insnsi + i + delta;
				continue;
			}
11092

11093 11094 11095 11096 11097 11098 11099
			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
				     (void *(*)(struct bpf_map *map, void *key))NULL));
			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
				     (int (*)(struct bpf_map *map, void *key))NULL));
			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
				     (int (*)(struct bpf_map *map, void *key, void *value,
					      u64 flags))NULL));
11100 11101 11102 11103 11104 11105 11106
			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
				     (int (*)(struct bpf_map *map, void *value,
					      u64 flags))NULL));
			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
				     (int (*)(struct bpf_map *map, void *value))NULL));
			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
				     (int (*)(struct bpf_map *map, void *value))NULL));
11107
patch_map_ops_generic:
11108 11109 11110 11111 11112 11113 11114 11115 11116 11117 11118 11119 11120
			switch (insn->imm) {
			case BPF_FUNC_map_lookup_elem:
				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
					    __bpf_call_base;
				continue;
			case BPF_FUNC_map_update_elem:
				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
					    __bpf_call_base;
				continue;
			case BPF_FUNC_map_delete_elem:
				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
					    __bpf_call_base;
				continue;
11121 11122 11123 11124 11125 11126 11127 11128 11129 11130 11131 11132
			case BPF_FUNC_map_push_elem:
				insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
					    __bpf_call_base;
				continue;
			case BPF_FUNC_map_pop_elem:
				insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
					    __bpf_call_base;
				continue;
			case BPF_FUNC_map_peek_elem:
				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
					    __bpf_call_base;
				continue;
11133
			}
11134

11135
			goto patch_call_imm;
11136 11137
		}

M
Martin KaFai Lau 已提交
11138 11139 11140 11141 11142 11143 11144 11145 11146 11147 11148 11149 11150 11151 11152 11153 11154 11155 11156 11157 11158 11159 11160 11161
		if (prog->jit_requested && BITS_PER_LONG == 64 &&
		    insn->imm == BPF_FUNC_jiffies64) {
			struct bpf_insn ld_jiffies_addr[2] = {
				BPF_LD_IMM64(BPF_REG_0,
					     (unsigned long)&jiffies),
			};

			insn_buf[0] = ld_jiffies_addr[0];
			insn_buf[1] = ld_jiffies_addr[1];
			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
						  BPF_REG_0, 0);
			cnt = 3;

			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
						       cnt);
			if (!new_prog)
				return -ENOMEM;

			delta    += cnt - 1;
			env->prog = prog = new_prog;
			insn      = new_prog->insnsi + i + delta;
			continue;
		}

11162
patch_call_imm:
11163
		fn = env->ops->get_func_proto(insn->imm, env->prog);
11164 11165 11166 11167
		/* all functions that have prototype and verifier allowed
		 * programs to call them, must be real in-kernel functions
		 */
		if (!fn->func) {
11168 11169
			verbose(env,
				"kernel subsystem misconfigured func %s#%d\n",
11170 11171
				func_id_name(insn->imm), insn->imm);
			return -EFAULT;
11172
		}
11173
		insn->imm = fn->func - __bpf_call_base;
11174 11175
	}

11176 11177 11178 11179 11180 11181 11182 11183 11184 11185 11186 11187 11188 11189 11190 11191 11192
	/* Since poke tab is now finalized, publish aux to tracker. */
	for (i = 0; i < prog->aux->size_poke_tab; i++) {
		map_ptr = prog->aux->poke_tab[i].tail_call.map;
		if (!map_ptr->ops->map_poke_track ||
		    !map_ptr->ops->map_poke_untrack ||
		    !map_ptr->ops->map_poke_run) {
			verbose(env, "bpf verifier is misconfigured\n");
			return -EINVAL;
		}

		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
		if (ret < 0) {
			verbose(env, "tracking tail call prog failed\n");
			return ret;
		}
	}

11193 11194
	return 0;
}
11195

11196
static void free_states(struct bpf_verifier_env *env)
11197
{
11198
	struct bpf_verifier_state_list *sl, *sln;
11199 11200
	int i;

11201 11202 11203 11204 11205 11206 11207
	sl = env->free_list;
	while (sl) {
		sln = sl->next;
		free_verifier_state(&sl->state, false);
		kfree(sl);
		sl = sln;
	}
11208
	env->free_list = NULL;
11209

11210 11211 11212
	if (!env->explored_states)
		return;

11213
	for (i = 0; i < state_htab_size(env); i++) {
11214 11215
		sl = env->explored_states[i];

A
Alexei Starovoitov 已提交
11216 11217 11218 11219 11220 11221
		while (sl) {
			sln = sl->next;
			free_verifier_state(&sl->state, false);
			kfree(sl);
			sl = sln;
		}
11222
		env->explored_states[i] = NULL;
11223
	}
11224
}
11225

11226 11227 11228 11229 11230 11231 11232 11233 11234 11235 11236 11237 11238 11239 11240 11241 11242 11243 11244 11245 11246 11247 11248 11249 11250 11251
/* The verifier is using insn_aux_data[] to store temporary data during
 * verification and to store information for passes that run after the
 * verification like dead code sanitization. do_check_common() for subprogram N
 * may analyze many other subprograms. sanitize_insn_aux_data() clears all
 * temporary data after do_check_common() finds that subprogram N cannot be
 * verified independently. pass_cnt counts the number of times
 * do_check_common() was run and insn->aux->seen tells the pass number
 * insn_aux_data was touched. These variables are compared to clear temporary
 * data from failed pass. For testing and experiments do_check_common() can be
 * run multiple times even when prior attempt to verify is unsuccessful.
 */
static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
{
	struct bpf_insn *insn = env->prog->insnsi;
	struct bpf_insn_aux_data *aux;
	int i, class;

	for (i = 0; i < env->prog->len; i++) {
		class = BPF_CLASS(insn[i].code);
		if (class != BPF_LDX && class != BPF_STX)
			continue;
		aux = &env->insn_aux_data[i];
		if (aux->seen != env->pass_cnt)
			continue;
		memset(aux, 0, offsetof(typeof(*aux), orig_idx));
	}
11252 11253
}

11254 11255
static int do_check_common(struct bpf_verifier_env *env, int subprog)
{
11256
	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11257 11258 11259 11260 11261 11262 11263 11264 11265 11266 11267 11268 11269 11270 11271 11272 11273 11274 11275 11276 11277 11278 11279 11280 11281
	struct bpf_verifier_state *state;
	struct bpf_reg_state *regs;
	int ret, i;

	env->prev_linfo = NULL;
	env->pass_cnt++;

	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
	if (!state)
		return -ENOMEM;
	state->curframe = 0;
	state->speculative = false;
	state->branches = 1;
	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
	if (!state->frame[0]) {
		kfree(state);
		return -ENOMEM;
	}
	env->cur_state = state;
	init_func_state(env, state->frame[0],
			BPF_MAIN_FUNC /* callsite */,
			0 /* frameno */,
			subprog);

	regs = state->frame[state->curframe]->regs;
11282
	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
11283 11284 11285 11286 11287 11288 11289 11290 11291 11292 11293 11294 11295 11296 11297 11298 11299 11300 11301 11302 11303 11304 11305 11306 11307 11308 11309 11310 11311
		ret = btf_prepare_func_args(env, subprog, regs);
		if (ret)
			goto out;
		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
			if (regs[i].type == PTR_TO_CTX)
				mark_reg_known_zero(env, regs, i);
			else if (regs[i].type == SCALAR_VALUE)
				mark_reg_unknown(env, regs, i);
		}
	} else {
		/* 1st arg to a function */
		regs[BPF_REG_1].type = PTR_TO_CTX;
		mark_reg_known_zero(env, regs, BPF_REG_1);
		ret = btf_check_func_arg_match(env, subprog, regs);
		if (ret == -EFAULT)
			/* unlikely verifier bug. abort.
			 * ret == 0 and ret < 0 are sadly acceptable for
			 * main() function due to backward compatibility.
			 * Like socket filter program may be written as:
			 * int bpf_prog(struct pt_regs *ctx)
			 * and never dereference that ctx in the program.
			 * 'struct pt_regs' is a type mismatch for socket
			 * filter that should be using 'struct __sk_buff'.
			 */
			goto out;
	}

	ret = do_check(env);
out:
11312 11313 11314 11315 11316 11317 11318
	/* check for NULL is necessary, since cur_state can be freed inside
	 * do_check() under memory pressure.
	 */
	if (env->cur_state) {
		free_verifier_state(env->cur_state, true);
		env->cur_state = NULL;
	}
11319 11320 11321
	while (!pop_stack(env, NULL, NULL, false));
	if (!ret && pop_log)
		bpf_vlog_reset(&env->log, 0);
11322 11323 11324 11325 11326 11327 11328 11329 11330 11331 11332 11333 11334 11335 11336 11337 11338 11339 11340 11341 11342 11343 11344 11345 11346 11347 11348 11349 11350 11351 11352 11353 11354 11355 11356 11357 11358 11359 11360 11361 11362 11363 11364 11365 11366 11367 11368 11369 11370 11371 11372 11373 11374 11375 11376 11377 11378 11379 11380 11381 11382
	free_states(env);
	if (ret)
		/* clean aux data in case subprog was rejected */
		sanitize_insn_aux_data(env);
	return ret;
}

/* Verify all global functions in a BPF program one by one based on their BTF.
 * All global functions must pass verification. Otherwise the whole program is rejected.
 * Consider:
 * int bar(int);
 * int foo(int f)
 * {
 *    return bar(f);
 * }
 * int bar(int b)
 * {
 *    ...
 * }
 * foo() will be verified first for R1=any_scalar_value. During verification it
 * will be assumed that bar() already verified successfully and call to bar()
 * from foo() will be checked for type match only. Later bar() will be verified
 * independently to check that it's safe for R1=any_scalar_value.
 */
static int do_check_subprogs(struct bpf_verifier_env *env)
{
	struct bpf_prog_aux *aux = env->prog->aux;
	int i, ret;

	if (!aux->func_info)
		return 0;

	for (i = 1; i < env->subprog_cnt; i++) {
		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
			continue;
		env->insn_idx = env->subprog_info[i].start;
		WARN_ON_ONCE(env->insn_idx == 0);
		ret = do_check_common(env, i);
		if (ret) {
			return ret;
		} else if (env->log.level & BPF_LOG_LEVEL) {
			verbose(env,
				"Func#%d is safe for any args that match its prototype\n",
				i);
		}
	}
	return 0;
}

static int do_check_main(struct bpf_verifier_env *env)
{
	int ret;

	env->insn_idx = 0;
	ret = do_check_common(env, 0);
	if (!ret)
		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
	return ret;
}


11383 11384 11385 11386 11387 11388 11389 11390 11391 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402 11403 11404
static void print_verification_stats(struct bpf_verifier_env *env)
{
	int i;

	if (env->log.level & BPF_LOG_STATS) {
		verbose(env, "verification time %lld usec\n",
			div_u64(env->verification_time, 1000));
		verbose(env, "stack depth ");
		for (i = 0; i < env->subprog_cnt; i++) {
			u32 depth = env->subprog_info[i].stack_depth;

			verbose(env, "%d", depth);
			if (i + 1 < env->subprog_cnt)
				verbose(env, "+");
		}
		verbose(env, "\n");
	}
	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
		"total_states %d peak_states %d mark_read %d\n",
		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
		env->max_states_per_insn, env->total_states,
		env->peak_states, env->longest_mark_read_walk);
11405 11406
}

11407 11408 11409 11410 11411 11412 11413 11414 11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425 11426 11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438 11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449 11450 11451 11452 11453 11454 11455 11456 11457
static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
{
	const struct btf_type *t, *func_proto;
	const struct bpf_struct_ops *st_ops;
	const struct btf_member *member;
	struct bpf_prog *prog = env->prog;
	u32 btf_id, member_idx;
	const char *mname;

	btf_id = prog->aux->attach_btf_id;
	st_ops = bpf_struct_ops_find(btf_id);
	if (!st_ops) {
		verbose(env, "attach_btf_id %u is not a supported struct\n",
			btf_id);
		return -ENOTSUPP;
	}

	t = st_ops->type;
	member_idx = prog->expected_attach_type;
	if (member_idx >= btf_type_vlen(t)) {
		verbose(env, "attach to invalid member idx %u of struct %s\n",
			member_idx, st_ops->name);
		return -EINVAL;
	}

	member = &btf_type_member(t)[member_idx];
	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
					       NULL);
	if (!func_proto) {
		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
			mname, member_idx, st_ops->name);
		return -EINVAL;
	}

	if (st_ops->check_member) {
		int err = st_ops->check_member(t, member);

		if (err) {
			verbose(env, "attach to unsupported member %s of struct %s\n",
				mname, st_ops->name);
			return err;
		}
	}

	prog->aux->attach_func_proto = func_proto;
	prog->aux->attach_func_name = mname;
	env->ops = st_ops->verifier_ops;

	return 0;
}
11458 11459
#define SECURITY_PREFIX "security_"

11460
static int check_attach_modify_return(unsigned long addr, const char *func_name)
11461
{
11462
	if (within_error_injection_list(addr) ||
11463
	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
11464 11465 11466 11467
		return 0;

	return -EINVAL;
}
11468

11469 11470 11471 11472
/* non exhaustive list of sleepable bpf_lsm_*() functions */
BTF_SET_START(btf_sleepable_lsm_hooks)
#ifdef CONFIG_BPF_LSM
BTF_ID(func, bpf_lsm_bprm_committed_creds)
11473 11474
#else
BTF_ID_UNUSED
11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486 11487 11488 11489 11490 11491 11492 11493 11494 11495 11496 11497 11498 11499
#endif
BTF_SET_END(btf_sleepable_lsm_hooks)

static int check_sleepable_lsm_hook(u32 btf_id)
{
	return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
}

/* list of non-sleepable functions that are otherwise on
 * ALLOW_ERROR_INJECTION list
 */
BTF_SET_START(btf_non_sleepable_error_inject)
/* Three functions below can be called from sleepable and non-sleepable context.
 * Assume non-sleepable from bpf safety point of view.
 */
BTF_ID(func, __add_to_page_cache_locked)
BTF_ID(func, should_fail_alloc_page)
BTF_ID(func, should_failslab)
BTF_SET_END(btf_non_sleepable_error_inject)

static int check_non_sleepable_error_inject(u32 btf_id)
{
	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
}

11500 11501 11502 11503 11504
int bpf_check_attach_target(struct bpf_verifier_log *log,
			    const struct bpf_prog *prog,
			    const struct bpf_prog *tgt_prog,
			    u32 btf_id,
			    struct bpf_attach_target_info *tgt_info)
11505
{
11506
	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
11507
	const char prefix[] = "btf_trace_";
11508
	int ret = 0, subprog = -1, i;
11509
	const struct btf_type *t;
11510
	bool conservative = true;
11511
	const char *tname;
11512
	struct btf *btf;
11513
	long addr = 0;
11514

11515
	if (!btf_id) {
11516
		bpf_log(log, "Tracing programs must provide btf_id\n");
11517 11518
		return -EINVAL;
	}
11519
	btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
11520
	if (!btf) {
11521
		bpf_log(log,
11522 11523 11524 11525
			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
		return -EINVAL;
	}
	t = btf_type_by_id(btf, btf_id);
11526
	if (!t) {
11527
		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
11528 11529
		return -EINVAL;
	}
11530
	tname = btf_name_by_offset(btf, t->name_off);
11531
	if (!tname) {
11532
		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
11533 11534
		return -EINVAL;
	}
11535 11536 11537 11538 11539 11540 11541 11542 11543
	if (tgt_prog) {
		struct bpf_prog_aux *aux = tgt_prog->aux;

		for (i = 0; i < aux->func_info_cnt; i++)
			if (aux->func_info[i].type_id == btf_id) {
				subprog = i;
				break;
			}
		if (subprog == -1) {
11544
			bpf_log(log, "Subprog %s doesn't exist\n", tname);
11545 11546 11547
			return -EINVAL;
		}
		conservative = aux->func_info_aux[subprog].unreliable;
11548 11549
		if (prog_extension) {
			if (conservative) {
11550
				bpf_log(log,
11551 11552 11553 11554
					"Cannot replace static functions\n");
				return -EINVAL;
			}
			if (!prog->jit_requested) {
11555
				bpf_log(log,
11556 11557 11558 11559 11560
					"Extension programs should be JITed\n");
				return -EINVAL;
			}
		}
		if (!tgt_prog->jited) {
11561
			bpf_log(log, "Can attach to only JITed progs\n");
11562 11563 11564 11565 11566 11567 11568
			return -EINVAL;
		}
		if (tgt_prog->type == prog->type) {
			/* Cannot fentry/fexit another fentry/fexit program.
			 * Cannot attach program extension to another extension.
			 * It's ok to attach fentry/fexit to extension program.
			 */
11569
			bpf_log(log, "Cannot recursively attach\n");
11570 11571 11572 11573 11574 11575 11576 11577 11578 11579 11580 11581 11582 11583 11584 11585 11586 11587 11588 11589 11590
			return -EINVAL;
		}
		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
		    prog_extension &&
		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
			/* Program extensions can extend all program types
			 * except fentry/fexit. The reason is the following.
			 * The fentry/fexit programs are used for performance
			 * analysis, stats and can be attached to any program
			 * type except themselves. When extension program is
			 * replacing XDP function it is necessary to allow
			 * performance analysis of all functions. Both original
			 * XDP program and its program extension. Hence
			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
			 * allowed. If extending of fentry/fexit was allowed it
			 * would be possible to create long call chain
			 * fentry->extension->fentry->extension beyond
			 * reasonable stack size. Hence extending fentry is not
			 * allowed.
			 */
11591
			bpf_log(log, "Cannot extend fentry/fexit\n");
11592 11593
			return -EINVAL;
		}
11594
	} else {
11595
		if (prog_extension) {
11596
			bpf_log(log, "Cannot replace kernel functions\n");
11597 11598
			return -EINVAL;
		}
11599
	}
11600 11601 11602

	switch (prog->expected_attach_type) {
	case BPF_TRACE_RAW_TP:
11603
		if (tgt_prog) {
11604
			bpf_log(log,
11605 11606 11607
				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
			return -EINVAL;
		}
11608
		if (!btf_type_is_typedef(t)) {
11609
			bpf_log(log, "attach_btf_id %u is not a typedef\n",
11610 11611 11612
				btf_id);
			return -EINVAL;
		}
11613
		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
11614
			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
11615 11616 11617 11618
				btf_id, tname);
			return -EINVAL;
		}
		tname += sizeof(prefix) - 1;
11619
		t = btf_type_by_id(btf, t->type);
11620 11621 11622
		if (!btf_type_is_ptr(t))
			/* should never happen in valid vmlinux build */
			return -EINVAL;
11623
		t = btf_type_by_id(btf, t->type);
11624 11625 11626 11627
		if (!btf_type_is_func_proto(t))
			/* should never happen in valid vmlinux build */
			return -EINVAL;

11628
		break;
11629 11630
	case BPF_TRACE_ITER:
		if (!btf_type_is_func(t)) {
11631
			bpf_log(log, "attach_btf_id %u is not a function\n",
11632 11633 11634 11635 11636 11637
				btf_id);
			return -EINVAL;
		}
		t = btf_type_by_id(btf, t->type);
		if (!btf_type_is_func_proto(t))
			return -EINVAL;
11638 11639 11640 11641
		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
		if (ret)
			return ret;
		break;
11642 11643 11644
	default:
		if (!prog_extension)
			return -EINVAL;
11645
		fallthrough;
K
KP Singh 已提交
11646
	case BPF_MODIFY_RETURN:
11647
	case BPF_LSM_MAC:
A
Alexei Starovoitov 已提交
11648 11649 11650
	case BPF_TRACE_FENTRY:
	case BPF_TRACE_FEXIT:
		if (!btf_type_is_func(t)) {
11651
			bpf_log(log, "attach_btf_id %u is not a function\n",
A
Alexei Starovoitov 已提交
11652 11653 11654
				btf_id);
			return -EINVAL;
		}
11655
		if (prog_extension &&
11656
		    btf_check_type_match(log, prog, btf, t))
11657
			return -EINVAL;
11658
		t = btf_type_by_id(btf, t->type);
A
Alexei Starovoitov 已提交
11659 11660
		if (!btf_type_is_func_proto(t))
			return -EINVAL;
11661

11662 11663 11664 11665 11666
		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
			return -EINVAL;

11667
		if (tgt_prog && conservative)
11668
			t = NULL;
11669 11670

		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
A
Alexei Starovoitov 已提交
11671
		if (ret < 0)
11672 11673
			return ret;

11674
		if (tgt_prog) {
11675 11676 11677 11678
			if (subprog == 0)
				addr = (long) tgt_prog->bpf_func;
			else
				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
11679 11680 11681
		} else {
			addr = kallsyms_lookup_name(tname);
			if (!addr) {
11682
				bpf_log(log,
11683 11684
					"The address of function %s cannot be found\n",
					tname);
11685
				return -ENOENT;
11686
			}
A
Alexei Starovoitov 已提交
11687
		}
11688

11689 11690 11691 11692 11693 11694 11695 11696 11697 11698 11699 11700 11701 11702 11703 11704 11705 11706 11707 11708 11709
		if (prog->aux->sleepable) {
			ret = -EINVAL;
			switch (prog->type) {
			case BPF_PROG_TYPE_TRACING:
				/* fentry/fexit/fmod_ret progs can be sleepable only if they are
				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
				 */
				if (!check_non_sleepable_error_inject(btf_id) &&
				    within_error_injection_list(addr))
					ret = 0;
				break;
			case BPF_PROG_TYPE_LSM:
				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
				 * Only some of them are sleepable.
				 */
				if (check_sleepable_lsm_hook(btf_id))
					ret = 0;
				break;
			default:
				break;
			}
11710 11711 11712 11713
			if (ret) {
				bpf_log(log, "%s is not sleepable\n", tname);
				return ret;
			}
11714
		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
11715
			if (tgt_prog) {
11716
				bpf_log(log, "can't modify return codes of BPF programs\n");
11717 11718 11719 11720 11721 11722
				return -EINVAL;
			}
			ret = check_attach_modify_return(addr, tname);
			if (ret) {
				bpf_log(log, "%s() is not modifiable\n", tname);
				return ret;
11723
			}
11724
		}
11725 11726 11727 11728 11729 11730 11731 11732 11733 11734 11735 11736

		break;
	}
	tgt_info->tgt_addr = addr;
	tgt_info->tgt_name = tname;
	tgt_info->tgt_type = t;
	return 0;
}

static int check_attach_btf_id(struct bpf_verifier_env *env)
{
	struct bpf_prog *prog = env->prog;
11737
	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
11738 11739 11740 11741 11742 11743 11744 11745 11746 11747 11748 11749 11750 11751 11752 11753 11754 11755 11756 11757 11758 11759
	struct bpf_attach_target_info tgt_info = {};
	u32 btf_id = prog->aux->attach_btf_id;
	struct bpf_trampoline *tr;
	int ret;
	u64 key;

	if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
	    prog->type != BPF_PROG_TYPE_LSM) {
		verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
		return -EINVAL;
	}

	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
		return check_struct_ops_btf_id(env);

	if (prog->type != BPF_PROG_TYPE_TRACING &&
	    prog->type != BPF_PROG_TYPE_LSM &&
	    prog->type != BPF_PROG_TYPE_EXT)
		return 0;

	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
	if (ret)
A
Alexei Starovoitov 已提交
11760
		return ret;
11761 11762

	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
11763 11764 11765 11766
		/* to make freplace equivalent to their targets, they need to
		 * inherit env->ops and expected_attach_type for the rest of the
		 * verification
		 */
11767 11768 11769 11770 11771 11772 11773 11774
		env->ops = bpf_verifier_ops[tgt_prog->type];
		prog->expected_attach_type = tgt_prog->expected_attach_type;
	}

	/* store info about the attachment target that will be used later */
	prog->aux->attach_func_proto = tgt_info.tgt_type;
	prog->aux->attach_func_name = tgt_info.tgt_name;

11775 11776 11777 11778 11779
	if (tgt_prog) {
		prog->aux->saved_dst_prog_type = tgt_prog->type;
		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
	}

11780 11781 11782 11783 11784 11785 11786 11787 11788 11789 11790 11791 11792
	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
		prog->aux->attach_btf_trace = true;
		return 0;
	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
		if (!bpf_iter_prog_supported(prog))
			return -EINVAL;
		return 0;
	}

	if (prog->type == BPF_PROG_TYPE_LSM) {
		ret = bpf_lsm_verify_prog(&env->log, prog);
		if (ret < 0)
			return ret;
11793
	}
11794 11795 11796 11797 11798 11799

	key = bpf_trampoline_compute_key(tgt_prog, btf_id);
	tr = bpf_trampoline_get(key, &tgt_info);
	if (!tr)
		return -ENOMEM;

11800
	prog->aux->dst_trampoline = tr;
11801
	return 0;
11802 11803
}

11804 11805 11806 11807 11808 11809 11810 11811 11812 11813 11814
struct btf *bpf_get_btf_vmlinux(void)
{
	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
		mutex_lock(&bpf_verifier_lock);
		if (!btf_vmlinux)
			btf_vmlinux = btf_parse_vmlinux();
		mutex_unlock(&bpf_verifier_lock);
	}
	return btf_vmlinux;
}

Y
Yonghong Song 已提交
11815 11816
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
	      union bpf_attr __user *uattr)
A
Alexei Starovoitov 已提交
11817
{
11818
	u64 start_time = ktime_get_ns();
11819
	struct bpf_verifier_env *env;
M
Martin KaFai Lau 已提交
11820
	struct bpf_verifier_log *log;
11821
	int i, len, ret = -EINVAL;
11822
	bool is_priv;
A
Alexei Starovoitov 已提交
11823

11824 11825 11826 11827
	/* no program is valid */
	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
		return -EINVAL;

11828
	/* 'struct bpf_verifier_env' can be global, but since it's not small,
11829 11830
	 * allocate/free it every time bpf_check() is called
	 */
11831
	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
11832 11833
	if (!env)
		return -ENOMEM;
11834
	log = &env->log;
11835

11836
	len = (*prog)->len;
11837
	env->insn_aux_data =
11838
		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
11839 11840 11841
	ret = -ENOMEM;
	if (!env->insn_aux_data)
		goto err_free_env;
11842 11843
	for (i = 0; i < len; i++)
		env->insn_aux_data[i].orig_idx = i;
11844
	env->prog = *prog;
11845
	env->ops = bpf_verifier_ops[env->prog->type];
A
Alexei Starovoitov 已提交
11846
	is_priv = bpf_capable();
11847

11848
	bpf_get_btf_vmlinux();
A
Alexei Starovoitov 已提交
11849

11850
	/* grab the mutex to protect few globals used by verifier */
A
Alexei Starovoitov 已提交
11851 11852
	if (!is_priv)
		mutex_lock(&bpf_verifier_lock);
11853 11854 11855 11856 11857

	if (attr->log_level || attr->log_buf || attr->log_size) {
		/* user requested verbose verifier output
		 * and supplied buffer to store the verification trace
		 */
11858 11859 11860
		log->level = attr->log_level;
		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
		log->len_total = attr->log_size;
11861 11862

		ret = -EINVAL;
11863
		/* log attributes have to be sane */
11864
		if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
11865
		    !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
11866
			goto err_unlock;
11867
	}
11868

A
Alexei Starovoitov 已提交
11869 11870 11871 11872
	if (IS_ERR(btf_vmlinux)) {
		/* Either gcc or pahole or kernel are broken. */
		verbose(env, "in-kernel BTF is malformed\n");
		ret = PTR_ERR(btf_vmlinux);
11873
		goto skip_full_check;
A
Alexei Starovoitov 已提交
11874 11875
	}

11876 11877
	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
11878
		env->strict_alignment = true;
D
David Miller 已提交
11879 11880
	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
		env->strict_alignment = false;
11881

A
Alexei Starovoitov 已提交
11882
	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
11883
	env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
A
Alexei Starovoitov 已提交
11884 11885 11886
	env->bypass_spec_v1 = bpf_bypass_spec_v1();
	env->bypass_spec_v4 = bpf_bypass_spec_v4();
	env->bpf_capable = bpf_capable();
11887

11888 11889 11890
	if (is_priv)
		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;

11891
	if (bpf_prog_is_dev_bound(env->prog->aux)) {
11892
		ret = bpf_prog_offload_verifier_prep(env->prog);
11893
		if (ret)
11894
			goto skip_full_check;
11895 11896
	}

11897
	env->explored_states = kvcalloc(state_htab_size(env),
11898
				       sizeof(struct bpf_verifier_state_list *),
11899 11900 11901 11902 11903
				       GFP_USER);
	ret = -ENOMEM;
	if (!env->explored_states)
		goto skip_full_check;

11904
	ret = check_subprogs(env);
11905 11906 11907
	if (ret < 0)
		goto skip_full_check;

M
Martin KaFai Lau 已提交
11908
	ret = check_btf_info(env, attr, uattr);
Y
Yonghong Song 已提交
11909 11910 11911
	if (ret < 0)
		goto skip_full_check;

11912 11913 11914 11915
	ret = check_attach_btf_id(env);
	if (ret)
		goto skip_full_check;

H
Hao Luo 已提交
11916 11917 11918 11919
	ret = resolve_pseudo_ldimm64(env);
	if (ret < 0)
		goto skip_full_check;

11920 11921 11922 11923
	ret = check_cfg(env);
	if (ret < 0)
		goto skip_full_check;

11924 11925
	ret = do_check_subprogs(env);
	ret = ret ?: do_check_main(env);
11926

11927 11928 11929
	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
		ret = bpf_prog_offload_finalize(env);

11930
skip_full_check:
11931
	kvfree(env->explored_states);
11932

A
Alexei Starovoitov 已提交
11933
	if (ret == 0)
11934
		ret = check_max_stack_depth(env);
A
Alexei Starovoitov 已提交
11935

11936
	/* instruction rewrites happen after this point */
11937 11938 11939
	if (is_priv) {
		if (ret == 0)
			opt_hard_wire_dead_code_branches(env);
J
Jakub Kicinski 已提交
11940 11941
		if (ret == 0)
			ret = opt_remove_dead_code(env);
11942 11943
		if (ret == 0)
			ret = opt_remove_nops(env);
J
Jakub Kicinski 已提交
11944 11945 11946
	} else {
		if (ret == 0)
			sanitize_dead_code(env);
11947 11948
	}

11949 11950 11951 11952
	if (ret == 0)
		/* program is valid, convert *(u32*)(ctx + off) accesses */
		ret = convert_ctx_accesses(env);

11953
	if (ret == 0)
11954
		ret = fixup_bpf_calls(env);
11955

11956 11957 11958
	/* do 32-bit optimization after insn patching has done so those patched
	 * insns could be handled correctly.
	 */
11959 11960 11961 11962
	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
								     : false;
11963 11964
	}

11965 11966 11967
	if (ret == 0)
		ret = fixup_call_args(env);

11968 11969 11970
	env->verification_time = ktime_get_ns() - start_time;
	print_verification_stats(env);

11971
	if (log->level && bpf_verifier_log_full(log))
11972
		ret = -ENOSPC;
11973
	if (log->level && !log->ubuf) {
11974
		ret = -EFAULT;
11975
		goto err_release_maps;
11976 11977
	}

11978 11979
	if (ret == 0 && env->used_map_cnt) {
		/* if program passed verifier, update used_maps in bpf_prog_info */
11980 11981 11982
		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
							  sizeof(env->used_maps[0]),
							  GFP_KERNEL);
11983

11984
		if (!env->prog->aux->used_maps) {
11985
			ret = -ENOMEM;
11986
			goto err_release_maps;
11987 11988
		}

11989
		memcpy(env->prog->aux->used_maps, env->used_maps,
11990
		       sizeof(env->used_maps[0]) * env->used_map_cnt);
11991
		env->prog->aux->used_map_cnt = env->used_map_cnt;
11992 11993 11994 11995 11996 11997

		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
		 * bpf_ld_imm64 instructions
		 */
		convert_pseudo_ld_imm64(env);
	}
11998

11999 12000 12001
	if (ret == 0)
		adjust_btf_func(env);

12002
err_release_maps:
12003
	if (!env->prog->aux->used_maps)
12004
		/* if we didn't copy map pointers into bpf_prog_info, release
12005
		 * them now. Otherwise free_used_maps() will release them.
12006 12007
		 */
		release_maps(env);
12008 12009 12010 12011 12012 12013 12014

	/* extension progs temporarily inherit the attach_type of their targets
	   for verification purposes, so set it back to zero before returning
	 */
	if (env->prog->type == BPF_PROG_TYPE_EXT)
		env->prog->expected_attach_type = 0;

12015
	*prog = env->prog;
12016
err_unlock:
A
Alexei Starovoitov 已提交
12017 12018
	if (!is_priv)
		mutex_unlock(&bpf_verifier_lock);
12019 12020 12021
	vfree(env->insn_aux_data);
err_free_env:
	kfree(env);
A
Alexei Starovoitov 已提交
12022 12023
	return ret;
}