bpf_jit_comp64.c 35.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8 9 10 11
/*
 * bpf_jit_comp64.c: eBPF JIT compiler
 *
 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
 *		  IBM Corporation
 *
 * Based on the powerpc classic BPF JIT compiler by Matt Evans
 */
#include <linux/moduleloader.h>
#include <asm/cacheflush.h>
12
#include <asm/asm-compat.h>
13 14 15 16
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <asm/kprobes.h>
17
#include <linux/bpf.h>
18 19 20 21 22

#include "bpf_jit64.h"

static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
{
23
	memset32(area, BREAKPOINT_INSTRUCTION, size/4);
24 25 26 27 28 29 30 31 32 33
}

static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
{
	/*
	 * We only need a stack frame if:
	 * - we call other functions (kernel helpers), or
	 * - the bpf program uses its stack area
	 * The latter condition is deduced from the usage of BPF_REG_FP
	 */
34
	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]);
35 36
}

37 38 39 40 41 42
/*
 * When not setting up our own stackframe, the redzone usage is:
 *
 *		[	prev sp		] <-------------
 *		[	  ...       	] 		|
 * sp (r1) --->	[    stack pointer	] --------------
43
 *		[   nv gpr save area	] 6*8
44 45 46 47 48 49 50
 *		[    tail_call_cnt	] 8
 *		[    local_tmp_var	] 8
 *		[   unused red zone	] 208 bytes protected
 */
static int bpf_jit_stack_local(struct codegen_context *ctx)
{
	if (bpf_has_stack_frame(ctx))
51
		return STACK_FRAME_MIN_SIZE + ctx->stack_size;
52 53 54 55
	else
		return -(BPF_PPC_STACK_SAVE + 16);
}

56 57 58 59 60
static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
{
	return bpf_jit_stack_local(ctx) + 8;
}

61 62 63
static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
{
	if (reg >= BPF_PPC_NVR_MIN && reg < 32)
64 65 66
		return (bpf_has_stack_frame(ctx) ?
			(BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
				- (8 * (32 - reg));
67 68 69 70 71

	pr_err("BPF JIT is asking about unknown registers");
	BUG();
}

72
static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
73
{
74 75
	int i;

76
	/*
77 78 79
	 * Initialize tail_call_cnt if we do tail calls.
	 * Otherwise, put in NOPs so that it can be skipped when we are
	 * invoked through a tail call.
80
	 */
81
	if (ctx->seen & SEEN_TAILCALL) {
82
		EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0));
83 84 85
		/* this goes in the redzone */
		PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
	} else {
86 87
		EMIT(PPC_RAW_NOP());
		EMIT(PPC_RAW_NOP());
88
	}
89

90
#define BPF_TAILCALL_PROLOGUE_SIZE	8
91

92
	if (bpf_has_stack_frame(ctx)) {
93 94 95 96 97 98 99 100 101
		/*
		 * We need a stack frame, but we don't necessarily need to
		 * save/restore LR unless we call other functions
		 */
		if (ctx->seen & SEEN_FUNC) {
			EMIT(PPC_INST_MFLR | __PPC_RT(R0));
			PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
		}

102
		PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
103 104 105 106 107 108 109 110
	}

	/*
	 * Back up non-volatile regs -- BPF registers 6-10
	 * If we haven't created our own stack frame, we save these
	 * in the protected zone below the previous stack frame
	 */
	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
111
		if (bpf_is_seen_register(ctx, b2p[i]))
112
			PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
113 114

	/* Setup frame pointer to point to the bpf stack area */
115
	if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP]))
116 117
		EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1,
				STACK_FRAME_MIN_SIZE + ctx->stack_size));
118 119
}

120
static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
121 122 123 124 125
{
	int i;

	/* Restore NVRs */
	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
126
		if (bpf_is_seen_register(ctx, b2p[i]))
127
			PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
128 129

	/* Tear down our stack frame */
130
	if (bpf_has_stack_frame(ctx)) {
131
		EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size));
132 133
		if (ctx->seen & SEEN_FUNC) {
			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
134
			EMIT(PPC_RAW_MTLR(0));
135 136
		}
	}
137 138 139 140 141 142 143
}

static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
{
	bpf_jit_emit_common_epilogue(image, ctx);

	/* Move result to r3 */
144
	EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0]));
145

146
	EMIT(PPC_RAW_BLR());
147 148
}

149 150 151 152 153 154 155 156 157
static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
				       u64 func)
{
#ifdef PPC64_ELF_ABI_v1
	/* func points to the function descriptor */
	PPC_LI64(b2p[TMP_REG_2], func);
	/* Load actual entry point from function descriptor */
	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
	/* ... and move it to LR */
158
	EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1]));
159 160 161 162 163 164 165 166 167 168
	/*
	 * Load TOC from function descriptor at offset 8.
	 * We can clobber r2 since we get called through a
	 * function pointer (so caller will save/restore r2)
	 * and since we don't use a TOC ourself.
	 */
	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
#else
	/* We can clobber r12 */
	PPC_FUNC_ADDR(12, func);
169
	EMIT(PPC_RAW_MTLR(12));
170
#endif
171
	EMIT(PPC_RAW_BLRL());
172 173 174 175
}

static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
				       u64 func)
176
{
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
	unsigned int i, ctx_idx = ctx->idx;

	/* Load function address into r12 */
	PPC_LI64(12, func);

	/* For bpf-to-bpf function calls, the callee's address is unknown
	 * until the last extra pass. As seen above, we use PPC_LI64() to
	 * load the callee's address, but this may optimize the number of
	 * instructions required based on the nature of the address.
	 *
	 * Since we don't want the number of instructions emitted to change,
	 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
	 * we always have a five-instruction sequence, which is the maximum
	 * that PPC_LI64() can emit.
	 */
	for (i = ctx->idx - ctx_idx; i < 5; i++)
193
		EMIT(PPC_RAW_NOP());
194

195 196 197 198 199 200 201
#ifdef PPC64_ELF_ABI_v1
	/*
	 * Load TOC from function descriptor at offset 8.
	 * We can clobber r2 since we get called through a
	 * function pointer (so caller will save/restore r2)
	 * and since we don't use a TOC ourself.
	 */
202 203 204
	PPC_BPF_LL(2, 12, 8);
	/* Load actual entry point from function descriptor */
	PPC_BPF_LL(12, 12, 0);
205
#endif
206

207 208
	EMIT(PPC_RAW_MTLR(12));
	EMIT(PPC_RAW_BLRL());
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
}

static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
{
	/*
	 * By now, the eBPF program has already setup parameters in r3, r4 and r5
	 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
	 * r4/BPF_REG_2 - pointer to bpf_array
	 * r5/BPF_REG_3 - index in bpf_array
	 */
	int b2p_bpf_array = b2p[BPF_REG_2];
	int b2p_index = b2p[BPF_REG_3];

	/*
	 * if (index >= array->map.max_entries)
	 *   goto out;
	 */
226
	EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
227 228
	EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
	EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1]));
229 230 231 232 233 234
	PPC_BCC(COND_GE, out);

	/*
	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
	 *   goto out;
	 */
235
	PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
236
	EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT));
237 238 239 240 241
	PPC_BCC(COND_GT, out);

	/*
	 * tail_call_cnt++;
	 */
242
	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1));
243 244 245
	PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));

	/* prog = array->ptrs[index]; */
246
	EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8));
247
	EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array));
248
	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
249 250 251 252 253

	/*
	 * if (prog == NULL)
	 *   goto out;
	 */
254
	EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0));
255 256 257
	PPC_BCC(COND_EQ, out);

	/* goto *(prog->bpf_func + prologue_size); */
258
	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
259 260
#ifdef PPC64_ELF_ABI_v1
	/* skip past the function descriptor */
261 262
	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
			FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE));
263
#else
264
	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE));
265
#endif
266
	EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1]));
267 268 269 270

	/* tear down stack, restore NVRs, ... */
	bpf_jit_emit_common_epilogue(image, ctx);

271
	EMIT(PPC_RAW_BCTR());
272 273 274
	/* out: */
}

275 276 277
/* Assemble the body code between the prologue & epilogue */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
			      struct codegen_context *ctx,
278
			      u32 *addrs, bool extra_pass)
279 280 281
{
	const struct bpf_insn *insn = fp->insnsi;
	int flen = fp->len;
282
	int i, ret;
283 284 285 286 287 288 289 290 291 292

	/* Start of epilogue code - will only be valid 2nd pass onwards */
	u32 exit_addr = addrs[flen];

	for (i = 0; i < flen; i++) {
		u32 code = insn[i].code;
		u32 dst_reg = b2p[insn[i].dst_reg];
		u32 src_reg = b2p[insn[i].src_reg];
		s16 off = insn[i].off;
		s32 imm = insn[i].imm;
293 294
		bool func_addr_fixed;
		u64 func_addr;
295 296
		u64 imm64;
		u32 true_cond;
297
		u32 tmp_idx;
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315

		/*
		 * addrs[] maps a BPF bytecode address into a real offset from
		 * the start of the body code.
		 */
		addrs[i] = ctx->idx * 4;

		/*
		 * As an optimization, we note down which non-volatile registers
		 * are used so that we can only save/restore those in our
		 * prologue and epilogue. We do this here regardless of whether
		 * the actual BPF instruction uses src/dst registers or not
		 * (for instance, BPF_CALL does not use them). The expectation
		 * is that those instructions will have src_reg/dst_reg set to
		 * 0. Even otherwise, we just lose some prologue/epilogue
		 * optimization but everything else should work without
		 * any issues.
		 */
316
		if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
317
			bpf_set_seen_register(ctx, dst_reg);
318
		if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
319
			bpf_set_seen_register(ctx, src_reg);
320 321 322 323 324 325 326

		switch (code) {
		/*
		 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
		 */
		case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
		case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
327
			EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg));
328 329 330
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
		case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
331
			EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg));
332 333 334 335 336 337 338 339 340
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
		case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
		case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
		case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
			if (BPF_OP(code) == BPF_SUB)
				imm = -imm;
			if (imm) {
				if (imm >= -32768 && imm < 32768)
341
					EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
342 343
				else {
					PPC_LI32(b2p[TMP_REG_1], imm);
344
					EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]));
345 346 347 348 349 350
				}
			}
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
		case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
			if (BPF_CLASS(code) == BPF_ALU)
351
				EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
352
			else
353
				EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg));
354 355 356 357
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
		case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
			if (imm >= -32768 && imm < 32768)
358
				EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm)));
359 360 361
			else {
				PPC_LI32(b2p[TMP_REG_1], imm);
				if (BPF_CLASS(code) == BPF_ALU)
362 363
					EMIT(PPC_RAW_MULW(dst_reg, dst_reg,
							b2p[TMP_REG_1]));
364
				else
365 366
					EMIT(PPC_RAW_MULD(dst_reg, dst_reg,
							b2p[TMP_REG_1]));
367 368 369 370 371
			}
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
			if (BPF_OP(code) == BPF_MOD) {
372 373 374 375
				EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg));
				EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg,
						b2p[TMP_REG_1]));
				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]));
376
			} else
377
				EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
378 379 380 381
			goto bpf_alu32_trunc;
		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
			if (BPF_OP(code) == BPF_MOD) {
382 383 384 385
				EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg));
				EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg,
						b2p[TMP_REG_1]));
				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]));
386
			} else
387
				EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg));
388 389 390 391 392 393 394 395 396 397 398 399 400 401
			break;
		case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
		case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
		case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
		case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
			if (imm == 0)
				return -EINVAL;
			else if (imm == 1)
				goto bpf_alu32_trunc;

			PPC_LI32(b2p[TMP_REG_1], imm);
			switch (BPF_CLASS(code)) {
			case BPF_ALU:
				if (BPF_OP(code) == BPF_MOD) {
402 403 404 405
					EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2],
							dst_reg,
							b2p[TMP_REG_1]));
					EMIT(PPC_RAW_MULW(b2p[TMP_REG_1],
406
							b2p[TMP_REG_1],
407 408 409
							b2p[TMP_REG_2]));
					EMIT(PPC_RAW_SUB(dst_reg, dst_reg,
							b2p[TMP_REG_1]));
410
				} else
411 412
					EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg,
							b2p[TMP_REG_1]));
413 414 415
				break;
			case BPF_ALU64:
				if (BPF_OP(code) == BPF_MOD) {
416 417 418 419
					EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2],
							dst_reg,
							b2p[TMP_REG_1]));
					EMIT(PPC_RAW_MULD(b2p[TMP_REG_1],
420
							b2p[TMP_REG_1],
421 422 423
							b2p[TMP_REG_2]));
					EMIT(PPC_RAW_SUB(dst_reg, dst_reg,
							b2p[TMP_REG_1]));
424
				} else
425 426
					EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg,
							b2p[TMP_REG_1]));
427 428 429 430 431
				break;
			}
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
		case BPF_ALU64 | BPF_NEG: /* dst = -dst */
432
			EMIT(PPC_RAW_NEG(dst_reg, dst_reg));
433 434 435 436 437 438 439
			goto bpf_alu32_trunc;

		/*
		 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
		 */
		case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
		case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
440
			EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
441 442 443 444
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
		case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
			if (!IMM_H(imm))
445
				EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
446 447 448
			else {
				/* Sign-extended */
				PPC_LI32(b2p[TMP_REG_1], imm);
449
				EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1]));
450 451 452 453
			}
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
		case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
454
			EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
455 456 457 458 459 460
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
				/* Sign-extended */
				PPC_LI32(b2p[TMP_REG_1], imm);
461
				EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1]));
462 463
			} else {
				if (IMM_L(imm))
464
					EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
465
				if (IMM_H(imm))
466
					EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm)));
467 468 469 470
			}
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
		case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
471
			EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
472 473 474 475 476 477
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
				/* Sign-extended */
				PPC_LI32(b2p[TMP_REG_1], imm);
478
				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]));
479 480
			} else {
				if (IMM_L(imm))
481
					EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
482
				if (IMM_H(imm))
483
					EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm)));
484 485 486 487
			}
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
			/* slw clears top 32 bits */
488
			EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
489 490 491
			/* skip zero extension move, but set address map. */
			if (insn_is_zext(&insn[i + 1]))
				addrs[++i] = ctx->idx * 4;
492 493
			break;
		case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
494
			EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg));
495 496 497
			break;
		case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
			/* with imm 0, we still need to clear top 32 bits */
498
			EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm));
499 500
			if (insn_is_zext(&insn[i + 1]))
				addrs[++i] = ctx->idx * 4;
501 502 503
			break;
		case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
			if (imm != 0)
504
				EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm));
505 506
			break;
		case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
507
			EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
508 509
			if (insn_is_zext(&insn[i + 1]))
				addrs[++i] = ctx->idx * 4;
510 511
			break;
		case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
512
			EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg));
513 514
			break;
		case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
515
			EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm));
516 517
			if (insn_is_zext(&insn[i + 1]))
				addrs[++i] = ctx->idx * 4;
518 519 520
			break;
		case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
			if (imm != 0)
521
				EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm));
522
			break;
523
		case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
524
			EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg));
525
			goto bpf_alu32_trunc;
526
		case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
527
			EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg));
528
			break;
529
		case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
530
			EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm));
531
			goto bpf_alu32_trunc;
532 533
		case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
			if (imm != 0)
534
				EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm));
535 536 537 538 539 540 541
			break;

		/*
		 * MOV
		 */
		case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
		case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
542 543
			if (imm == 1) {
				/* special mov32 for zext */
544
				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
545 546
				break;
			}
547
			EMIT(PPC_RAW_MR(dst_reg, src_reg));
548 549 550 551 552 553
			goto bpf_alu32_trunc;
		case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
		case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
			PPC_LI32(dst_reg, imm);
			if (imm < 0)
				goto bpf_alu32_trunc;
554 555
			else if (insn_is_zext(&insn[i + 1]))
				addrs[++i] = ctx->idx * 4;
556 557 558 559
			break;

bpf_alu32_trunc:
		/* Truncate to 32-bits */
560
		if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
561
			EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
		break;

		/*
		 * BPF_FROM_BE/LE
		 */
		case BPF_ALU | BPF_END | BPF_FROM_LE:
		case BPF_ALU | BPF_END | BPF_FROM_BE:
#ifdef __BIG_ENDIAN__
			if (BPF_SRC(code) == BPF_FROM_BE)
				goto emit_clear;
#else /* !__BIG_ENDIAN__ */
			if (BPF_SRC(code) == BPF_FROM_LE)
				goto emit_clear;
#endif
			switch (imm) {
			case 16:
				/* Rotate 8 bits left & mask with 0x0000ff00 */
579
				EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23));
580
				/* Rotate 8 bits right & insert LSB to reg */
581
				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31));
582
				/* Move result back to dst_reg */
583
				EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
584 585 586 587 588 589 590
				break;
			case 32:
				/*
				 * Rotate word left by 8 bits:
				 * 2 bytes are already in their final position
				 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
				 */
591
				EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31));
592
				/* Rotate 24 bits and insert byte 1 */
593
				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7));
594
				/* Rotate 24 bits and insert byte 3 */
595 596
				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23));
				EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
597 598 599 600 601 602 603 604 605 606
				break;
			case 64:
				/*
				 * Way easier and faster(?) to store the value
				 * into stack and then use ldbrx
				 *
				 * ctx->seen will be reliable in pass2, but
				 * the instructions generated will remain the
				 * same across all passes
				 */
607
				PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
608 609
				EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
				EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
610 611 612 613 614 615 616 617
				break;
			}
			break;

emit_clear:
			switch (imm) {
			case 16:
				/* zero-extend 16 bits into 64 bits */
618
				EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48));
619 620
				if (insn_is_zext(&insn[i + 1]))
					addrs[++i] = ctx->idx * 4;
621 622
				break;
			case 32:
623 624
				if (!fp->aux->verifier_zext)
					/* zero-extend 32 bits into 64 bits */
625
					EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32));
626 627 628 629 630 631 632 633 634 635 636 637 638
				break;
			case 64:
				/* nop */
				break;
			}
			break;

		/*
		 * BPF_ST(X)
		 */
		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
			if (BPF_CLASS(code) == BPF_ST) {
639
				EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
640 641
				src_reg = b2p[TMP_REG_1];
			}
642
			EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
643 644 645 646
			break;
		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
			if (BPF_CLASS(code) == BPF_ST) {
647
				EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
648 649
				src_reg = b2p[TMP_REG_1];
			}
650
			EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
651 652 653 654 655 656 657
			break;
		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
			if (BPF_CLASS(code) == BPF_ST) {
				PPC_LI32(b2p[TMP_REG_1], imm);
				src_reg = b2p[TMP_REG_1];
			}
658
			EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
659 660 661 662 663 664 665
			break;
		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
			if (BPF_CLASS(code) == BPF_ST) {
				PPC_LI32(b2p[TMP_REG_1], imm);
				src_reg = b2p[TMP_REG_1];
			}
666
			PPC_BPF_STL(src_reg, dst_reg, off);
667 668 669
			break;

		/*
670
		 * BPF_STX ATOMIC (atomic ops)
671
		 */
672 673 674 675 676 677 678 679 680 681
		case BPF_STX | BPF_ATOMIC | BPF_W:
			if (insn->imm != BPF_ADD) {
				pr_err_ratelimited(
					"eBPF filter atomic op code %02x (@%d) unsupported\n",
					code, i);
				return -ENOTSUPP;
			}

			/* *(u32 *)(dst + off) += src */

682
			/* Get EA into TMP_REG_1 */
683
			EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off));
684
			tmp_idx = ctx->idx * 4;
685
			/* load value from memory into TMP_REG_2 */
686
			EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0));
687
			/* add value from src_reg into this */
688
			EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg));
689
			/* store result back */
690
			EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]));
691
			/* we're done if this succeeded */
692
			PPC_BCC_SHORT(COND_NE, tmp_idx);
693
			break;
694 695 696 697 698 699 700 701 702
		case BPF_STX | BPF_ATOMIC | BPF_DW:
			if (insn->imm != BPF_ADD) {
				pr_err_ratelimited(
					"eBPF filter atomic op code %02x (@%d) unsupported\n",
					code, i);
				return -ENOTSUPP;
			}
			/* *(u64 *)(dst + off) += src */

703
			EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off));
704
			tmp_idx = ctx->idx * 4;
705 706 707
			EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0));
			EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg));
			EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]));
708
			PPC_BCC_SHORT(COND_NE, tmp_idx);
709 710 711 712 713 714 715
			break;

		/*
		 * BPF_LDX
		 */
		/* dst = *(u8 *)(ul) (src + off) */
		case BPF_LDX | BPF_MEM | BPF_B:
716
			EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
717 718
			if (insn_is_zext(&insn[i + 1]))
				addrs[++i] = ctx->idx * 4;
719 720 721
			break;
		/* dst = *(u16 *)(ul) (src + off) */
		case BPF_LDX | BPF_MEM | BPF_H:
722
			EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
723 724
			if (insn_is_zext(&insn[i + 1]))
				addrs[++i] = ctx->idx * 4;
725 726 727
			break;
		/* dst = *(u32 *)(ul) (src + off) */
		case BPF_LDX | BPF_MEM | BPF_W:
728
			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
729 730
			if (insn_is_zext(&insn[i + 1]))
				addrs[++i] = ctx->idx * 4;
731 732 733
			break;
		/* dst = *(u64 *)(ul) (src + off) */
		case BPF_LDX | BPF_MEM | BPF_DW:
734
			PPC_BPF_LL(dst_reg, src_reg, off);
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763
			break;

		/*
		 * Doubleword load
		 * 16 byte instruction that uses two 'struct bpf_insn'
		 */
		case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
			imm64 = ((u64)(u32) insn[i].imm) |
				    (((u64)(u32) insn[i+1].imm) << 32);
			/* Adjust for two bpf instructions */
			addrs[++i] = ctx->idx * 4;
			PPC_LI64(dst_reg, imm64);
			break;

		/*
		 * Return/Exit
		 */
		case BPF_JMP | BPF_EXIT:
			/*
			 * If this isn't the very last instruction, branch to
			 * the epilogue. If we _are_ the last instruction,
			 * we'll just fall through to the epilogue.
			 */
			if (i != flen - 1)
				PPC_JMP(exit_addr);
			/* else fall through to the epilogue */
			break;

		/*
764
		 * Call kernel helper or bpf function
765 766 767
		 */
		case BPF_JMP | BPF_CALL:
			ctx->seen |= SEEN_FUNC;
768

769 770 771 772
			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
						    &func_addr, &func_addr_fixed);
			if (ret < 0)
				return ret;
773

774 775 776 777
			if (func_addr_fixed)
				bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
			else
				bpf_jit_emit_func_call_rel(image, ctx, func_addr);
778
			/* move return value from r3 to BPF_REG_0 */
779
			EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3));
780 781 782 783 784 785 786 787 788 789 790 791 792
			break;

		/*
		 * Jumps and branches
		 */
		case BPF_JMP | BPF_JA:
			PPC_JMP(addrs[i + 1 + off]);
			break;

		case BPF_JMP | BPF_JGT | BPF_K:
		case BPF_JMP | BPF_JGT | BPF_X:
		case BPF_JMP | BPF_JSGT | BPF_K:
		case BPF_JMP | BPF_JSGT | BPF_X:
793 794 795 796
		case BPF_JMP32 | BPF_JGT | BPF_K:
		case BPF_JMP32 | BPF_JGT | BPF_X:
		case BPF_JMP32 | BPF_JSGT | BPF_K:
		case BPF_JMP32 | BPF_JSGT | BPF_X:
797 798
			true_cond = COND_GT;
			goto cond_branch;
799 800 801 802
		case BPF_JMP | BPF_JLT | BPF_K:
		case BPF_JMP | BPF_JLT | BPF_X:
		case BPF_JMP | BPF_JSLT | BPF_K:
		case BPF_JMP | BPF_JSLT | BPF_X:
803 804 805 806
		case BPF_JMP32 | BPF_JLT | BPF_K:
		case BPF_JMP32 | BPF_JLT | BPF_X:
		case BPF_JMP32 | BPF_JSLT | BPF_K:
		case BPF_JMP32 | BPF_JSLT | BPF_X:
807 808
			true_cond = COND_LT;
			goto cond_branch;
809 810 811 812
		case BPF_JMP | BPF_JGE | BPF_K:
		case BPF_JMP | BPF_JGE | BPF_X:
		case BPF_JMP | BPF_JSGE | BPF_K:
		case BPF_JMP | BPF_JSGE | BPF_X:
813 814 815 816
		case BPF_JMP32 | BPF_JGE | BPF_K:
		case BPF_JMP32 | BPF_JGE | BPF_X:
		case BPF_JMP32 | BPF_JSGE | BPF_K:
		case BPF_JMP32 | BPF_JSGE | BPF_X:
817 818
			true_cond = COND_GE;
			goto cond_branch;
819 820 821 822
		case BPF_JMP | BPF_JLE | BPF_K:
		case BPF_JMP | BPF_JLE | BPF_X:
		case BPF_JMP | BPF_JSLE | BPF_K:
		case BPF_JMP | BPF_JSLE | BPF_X:
823 824 825 826
		case BPF_JMP32 | BPF_JLE | BPF_K:
		case BPF_JMP32 | BPF_JLE | BPF_X:
		case BPF_JMP32 | BPF_JSLE | BPF_K:
		case BPF_JMP32 | BPF_JSLE | BPF_X:
827 828
			true_cond = COND_LE;
			goto cond_branch;
829 830
		case BPF_JMP | BPF_JEQ | BPF_K:
		case BPF_JMP | BPF_JEQ | BPF_X:
831 832
		case BPF_JMP32 | BPF_JEQ | BPF_K:
		case BPF_JMP32 | BPF_JEQ | BPF_X:
833 834 835 836
			true_cond = COND_EQ;
			goto cond_branch;
		case BPF_JMP | BPF_JNE | BPF_K:
		case BPF_JMP | BPF_JNE | BPF_X:
837 838
		case BPF_JMP32 | BPF_JNE | BPF_K:
		case BPF_JMP32 | BPF_JNE | BPF_X:
839 840 841 842
			true_cond = COND_NE;
			goto cond_branch;
		case BPF_JMP | BPF_JSET | BPF_K:
		case BPF_JMP | BPF_JSET | BPF_X:
843 844
		case BPF_JMP32 | BPF_JSET | BPF_K:
		case BPF_JMP32 | BPF_JSET | BPF_X:
845 846 847 848 849 850
			true_cond = COND_NE;
			/* Fall through */

cond_branch:
			switch (code) {
			case BPF_JMP | BPF_JGT | BPF_X:
851
			case BPF_JMP | BPF_JLT | BPF_X:
852
			case BPF_JMP | BPF_JGE | BPF_X:
853
			case BPF_JMP | BPF_JLE | BPF_X:
854 855
			case BPF_JMP | BPF_JEQ | BPF_X:
			case BPF_JMP | BPF_JNE | BPF_X:
856 857 858 859 860 861
			case BPF_JMP32 | BPF_JGT | BPF_X:
			case BPF_JMP32 | BPF_JLT | BPF_X:
			case BPF_JMP32 | BPF_JGE | BPF_X:
			case BPF_JMP32 | BPF_JLE | BPF_X:
			case BPF_JMP32 | BPF_JEQ | BPF_X:
			case BPF_JMP32 | BPF_JNE | BPF_X:
862
				/* unsigned comparison */
863
				if (BPF_CLASS(code) == BPF_JMP32)
864
					EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
865
				else
866
					EMIT(PPC_RAW_CMPLD(dst_reg, src_reg));
867 868
				break;
			case BPF_JMP | BPF_JSGT | BPF_X:
869
			case BPF_JMP | BPF_JSLT | BPF_X:
870
			case BPF_JMP | BPF_JSGE | BPF_X:
871
			case BPF_JMP | BPF_JSLE | BPF_X:
872 873 874 875
			case BPF_JMP32 | BPF_JSGT | BPF_X:
			case BPF_JMP32 | BPF_JSLT | BPF_X:
			case BPF_JMP32 | BPF_JSGE | BPF_X:
			case BPF_JMP32 | BPF_JSLE | BPF_X:
876
				/* signed comparison */
877
				if (BPF_CLASS(code) == BPF_JMP32)
878
					EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
879
				else
880
					EMIT(PPC_RAW_CMPD(dst_reg, src_reg));
881 882
				break;
			case BPF_JMP | BPF_JSET | BPF_X:
883 884
			case BPF_JMP32 | BPF_JSET | BPF_X:
				if (BPF_CLASS(code) == BPF_JMP) {
885 886
					EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg,
						    src_reg));
887 888 889
				} else {
					int tmp_reg = b2p[TMP_REG_1];

890 891 892
					EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg));
					EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
						       31));
893
				}
894 895 896 897
				break;
			case BPF_JMP | BPF_JNE | BPF_K:
			case BPF_JMP | BPF_JEQ | BPF_K:
			case BPF_JMP | BPF_JGT | BPF_K:
898
			case BPF_JMP | BPF_JLT | BPF_K:
899
			case BPF_JMP | BPF_JGE | BPF_K:
900
			case BPF_JMP | BPF_JLE | BPF_K:
901 902 903 904 905 906 907 908 909
			case BPF_JMP32 | BPF_JNE | BPF_K:
			case BPF_JMP32 | BPF_JEQ | BPF_K:
			case BPF_JMP32 | BPF_JGT | BPF_K:
			case BPF_JMP32 | BPF_JLT | BPF_K:
			case BPF_JMP32 | BPF_JGE | BPF_K:
			case BPF_JMP32 | BPF_JLE | BPF_K:
			{
				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;

910 911 912 913
				/*
				 * Need sign-extended load, so only positive
				 * values can be used as imm in cmpldi
				 */
914 915
				if (imm >= 0 && imm < 32768) {
					if (is_jmp32)
916
						EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
917
					else
918
						EMIT(PPC_RAW_CMPLDI(dst_reg, imm));
919
				} else {
920 921 922
					/* sign-extending load */
					PPC_LI32(b2p[TMP_REG_1], imm);
					/* ... but unsigned comparison */
923
					if (is_jmp32)
924 925
						EMIT(PPC_RAW_CMPLW(dst_reg,
							  b2p[TMP_REG_1]));
926
					else
927 928
						EMIT(PPC_RAW_CMPLD(dst_reg,
							  b2p[TMP_REG_1]));
929 930
				}
				break;
931
			}
932
			case BPF_JMP | BPF_JSGT | BPF_K:
933
			case BPF_JMP | BPF_JSLT | BPF_K:
934
			case BPF_JMP | BPF_JSGE | BPF_K:
935
			case BPF_JMP | BPF_JSLE | BPF_K:
936 937 938 939 940 941 942
			case BPF_JMP32 | BPF_JSGT | BPF_K:
			case BPF_JMP32 | BPF_JSLT | BPF_K:
			case BPF_JMP32 | BPF_JSGE | BPF_K:
			case BPF_JMP32 | BPF_JSLE | BPF_K:
			{
				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;

943 944 945 946
				/*
				 * signed comparison, so any 16-bit value
				 * can be used in cmpdi
				 */
947 948
				if (imm >= -32768 && imm < 32768) {
					if (is_jmp32)
949
						EMIT(PPC_RAW_CMPWI(dst_reg, imm));
950
					else
951
						EMIT(PPC_RAW_CMPDI(dst_reg, imm));
952
				} else {
953
					PPC_LI32(b2p[TMP_REG_1], imm);
954
					if (is_jmp32)
955 956
						EMIT(PPC_RAW_CMPW(dst_reg,
							 b2p[TMP_REG_1]));
957
					else
958 959
						EMIT(PPC_RAW_CMPD(dst_reg,
							 b2p[TMP_REG_1]));
960 961
				}
				break;
962
			}
963
			case BPF_JMP | BPF_JSET | BPF_K:
964
			case BPF_JMP32 | BPF_JSET | BPF_K:
965 966 967
				/* andi does not sign-extend the immediate */
				if (imm >= 0 && imm < 32768)
					/* PPC_ANDI is _only/always_ dot-form */
968
					EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm));
969
				else {
970 971 972 973
					int tmp_reg = b2p[TMP_REG_1];

					PPC_LI32(tmp_reg, imm);
					if (BPF_CLASS(code) == BPF_JMP) {
974 975
						EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg,
							    tmp_reg));
976
					} else {
977 978 979 980
						EMIT(PPC_RAW_AND(tmp_reg, dst_reg,
							tmp_reg));
						EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg,
							       0, 0, 31));
981
					}
982 983 984 985 986 987 988
				}
				break;
			}
			PPC_BCC(true_cond, addrs[i + 1 + off]);
			break;

		/*
989
		 * Tail call
990
		 */
991
		case BPF_JMP | BPF_TAIL_CALL:
992 993 994
			ctx->seen |= SEEN_TAILCALL;
			bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
			break;
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013

		default:
			/*
			 * The filter contains something cruel & unusual.
			 * We don't handle it, but also there shouldn't be
			 * anything missing from our list.
			 */
			pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n",
					code, i);
			return -ENOTSUPP;
		}
	}

	/* Set end-of-body-code address for exit. */
	addrs[i] = ctx->idx * 4;

	return 0;
}

1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
/* Fix the branch target addresses for subprog calls */
static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
				       struct codegen_context *ctx, u32 *addrs)
{
	const struct bpf_insn *insn = fp->insnsi;
	bool func_addr_fixed;
	u64 func_addr;
	u32 tmp_idx;
	int i, ret;

	for (i = 0; i < fp->len; i++) {
		/*
		 * During the extra pass, only the branch target addresses for
		 * the subprog calls need to be fixed. All other instructions
		 * can left untouched.
		 *
		 * The JITed image length does not change because we already
		 * ensure that the JITed instruction sequence for these calls
		 * are of fixed length by padding them with NOPs.
		 */
		if (insn[i].code == (BPF_JMP | BPF_CALL) &&
		    insn[i].src_reg == BPF_PSEUDO_CALL) {
			ret = bpf_jit_get_func_addr(fp, &insn[i], true,
						    &func_addr,
						    &func_addr_fixed);
			if (ret < 0)
				return ret;

			/*
			 * Save ctx->idx as this would currently point to the
			 * end of the JITed image and set it to the offset of
			 * the instruction sequence corresponding to the
			 * subprog call temporarily.
			 */
			tmp_idx = ctx->idx;
			ctx->idx = addrs[i] / 4;
			bpf_jit_emit_func_call_rel(image, ctx, func_addr);

			/*
			 * Restore ctx->idx here. This is safe as the length
			 * of the JITed sequence remains unchanged.
			 */
			ctx->idx = tmp_idx;
		}
	}

	return 0;
}

1063 1064 1065 1066 1067 1068 1069 1070
struct powerpc64_jit_data {
	struct bpf_binary_header *header;
	u32 *addrs;
	u8 *image;
	u32 proglen;
	struct codegen_context ctx;
};

1071 1072 1073 1074 1075
bool bpf_jit_needs_zext(void)
{
	return true;
}

1076 1077 1078 1079 1080 1081 1082
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
	u32 proglen;
	u32 alloclen;
	u8 *image = NULL;
	u32 *code_base;
	u32 *addrs;
1083
	struct powerpc64_jit_data *jit_data;
1084 1085 1086 1087
	struct codegen_context cgctx;
	int pass;
	int flen;
	struct bpf_binary_header *bpf_hdr;
1088 1089 1090
	struct bpf_prog *org_fp = fp;
	struct bpf_prog *tmp_fp;
	bool bpf_blinded = false;
1091
	bool extra_pass = false;
1092

1093
	if (!fp->jit_requested)
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103
		return org_fp;

	tmp_fp = bpf_jit_blind_constants(org_fp);
	if (IS_ERR(tmp_fp))
		return org_fp;

	if (tmp_fp != org_fp) {
		bpf_blinded = true;
		fp = tmp_fp;
	}
1104

1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
	jit_data = fp->aux->jit_data;
	if (!jit_data) {
		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
		if (!jit_data) {
			fp = org_fp;
			goto out;
		}
		fp->aux->jit_data = jit_data;
	}

1115
	flen = fp->len;
1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
	addrs = jit_data->addrs;
	if (addrs) {
		cgctx = jit_data->ctx;
		image = jit_data->image;
		bpf_hdr = jit_data->header;
		proglen = jit_data->proglen;
		alloclen = proglen + FUNCTION_DESCR_SIZE;
		extra_pass = true;
		goto skip_init_ctx;
	}

K
Kees Cook 已提交
1127
	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
1128 1129
	if (addrs == NULL) {
		fp = org_fp;
1130
		goto out_addrs;
1131 1132 1133
	}

	memset(&cgctx, 0, sizeof(struct codegen_context));
1134

1135 1136 1137
	/* Make sure that the stack is quadword aligned. */
	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);

1138
	/* Scouting faux-generate pass 0 */
1139
	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
1140
		/* We hit something illegal or unsupported. */
1141
		fp = org_fp;
1142
		goto out_addrs;
1143
	}
1144

1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157
	/*
	 * If we have seen a tail call, we need a second pass.
	 * This is because bpf_jit_emit_common_epilogue() is called
	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
	 */
	if (cgctx.seen & SEEN_TAILCALL) {
		cgctx.idx = 0;
		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
			fp = org_fp;
			goto out_addrs;
		}
	}

1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
	/*
	 * Pretend to build prologue, given the features we've seen.  This will
	 * update ctgtx.idx as it pretends to output instructions, then we can
	 * calculate total size from idx.
	 */
	bpf_jit_build_prologue(0, &cgctx);
	bpf_jit_build_epilogue(0, &cgctx);

	proglen = cgctx.idx * 4;
	alloclen = proglen + FUNCTION_DESCR_SIZE;

	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
			bpf_jit_fill_ill_insns);
1171 1172
	if (!bpf_hdr) {
		fp = org_fp;
1173
		goto out_addrs;
1174
	}
1175

1176
skip_init_ctx:
1177 1178
	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);

1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
	if (extra_pass) {
		/*
		 * Do not touch the prologue and epilogue as they will remain
		 * unchanged. Only fix the branch target address for subprog
		 * calls in the body.
		 *
		 * This does not change the offsets and lengths of the subprog
		 * call instruction sequences and hence, the size of the JITed
		 * image as well.
		 */
		bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);

		/* There is no need to perform the usual passes. */
		goto skip_codegen_passes;
	}

1195 1196 1197 1198 1199
	/* Code generation passes 1-2 */
	for (pass = 1; pass < 3; pass++) {
		/* Now build the prologue, body code & epilogue for real. */
		cgctx.idx = 0;
		bpf_jit_build_prologue(code_base, &cgctx);
1200
		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
1201 1202 1203 1204 1205 1206 1207
		bpf_jit_build_epilogue(code_base, &cgctx);

		if (bpf_jit_enable > 1)
			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
				proglen - (cgctx.idx * 4), cgctx.seen);
	}

1208
skip_codegen_passes:
1209 1210 1211 1212 1213 1214 1215 1216
	if (bpf_jit_enable > 1)
		/*
		 * Note that we output the base address of the code_base
		 * rather than image, since opcodes are in code_base.
		 */
		bpf_jit_dump(flen, proglen, pass, code_base);

#ifdef PPC64_ELF_ABI_v1
1217 1218 1219
	/* Function descriptor nastiness: Address + TOC */
	((u64 *)image)[0] = (u64)code_base;
	((u64 *)image)[1] = local_paca->kernel_toc;
1220
#endif
1221 1222 1223

	fp->bpf_func = (void *)image;
	fp->jited = 1;
1224
	fp->jited_len = alloclen;
1225

1226
	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
1227
	if (!fp->is_func || extra_pass) {
1228
		bpf_prog_fill_jited_linfo(fp, addrs);
1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239
out_addrs:
		kfree(addrs);
		kfree(jit_data);
		fp->aux->jit_data = NULL;
	} else {
		jit_data->addrs = addrs;
		jit_data->ctx = cgctx;
		jit_data->proglen = proglen;
		jit_data->image = image;
		jit_data->header = bpf_hdr;
	}
1240 1241

out:
1242 1243 1244
	if (bpf_blinded)
		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);

1245 1246 1247
	return fp;
}

1248
/* Overriding bpf_jit_free() as we don't set images read-only. */
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
void bpf_jit_free(struct bpf_prog *fp)
{
	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
	struct bpf_binary_header *bpf_hdr = (void *)addr;

	if (fp->jited)
		bpf_jit_binary_free(bpf_hdr);

	bpf_prog_unlock_free(fp);
}