diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b162ad70effcfeacbc7a304d569b222717f495d7..7658612d915caaf11c1c5df8743994e0c87a3412 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -152,8 +152,6 @@ static void build_prologue(struct jit_ctx *ctx) const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; - const u8 ra = bpf2a64[BPF_REG_A]; - const u8 rx = bpf2a64[BPF_REG_X]; const u8 tmp1 = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; @@ -200,10 +198,6 @@ static void build_prologue(struct jit_ctx *ctx) /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); - - /* Clear registers A and X */ - emit_a64_mov_i64(ra, 0, ctx); - emit_a64_mov_i64(rx, 0, ctx); } static void build_epilogue(struct jit_ctx *ctx) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9a0c4c22e53670b1d813f3ddfb328b76c8e06c78..3c0bfc1f26941dde063cff501d4f1b24b8e72f5f 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -408,7 +408,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) * Save registers and create stack frame if necessary. * See stack frame layout desription in "bpf_jit.h"! */ -static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) +static void bpf_jit_prologue(struct bpf_jit *jit) { if (jit->seen & SEEN_TAIL_CALL) { /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ @@ -448,15 +448,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_SKBP); - /* Clear A (%b0) and X (%b7) registers for converted BPF programs */ - if (is_classic) { - if (REG_SEEN(BPF_REG_A)) - /* lghi %ba,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_A, 0); - if (REG_SEEN(BPF_REG_X)) - /* lghi %bx,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_X, 0); - } } /* @@ -1245,7 +1236,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) jit->lit = jit->lit_start; jit->prg = 0; - bpf_jit_prologue(jit, bpf_prog_was_classic(fp)); + bpf_jit_prologue(jit); for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 75991979f667f1b9e0e320fa47852969d1066983..4286f3618bd07c32bba605874c4f9915be47820e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -193,7 +193,7 @@ struct jit_context { 32 /* space for rbx, r13, r14, r15 */ + \ 8 /* space for skb_copy_bits() buffer */) -#define PROLOGUE_SIZE 51 +#define PROLOGUE_SIZE 48 /* emit x64 prologue code for BPF program and check it's size. * bpf_tail_call helper will skip it while jumping into another program @@ -229,11 +229,15 @@ static void emit_prologue(u8 **pprog) /* mov qword ptr [rbp-X],r15 */ EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24); - /* clear A and X registers */ - EMIT2(0x31, 0xc0); /* xor eax, eax */ - EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ + /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls + * we need to reset the counter to 0. It's done in two instructions, + * resetting rax register to 0 (xor on eax gets 0 extended), and + * moving it to the counter location. + */ - /* clear tail_cnt: mov qword ptr [rbp-X], rax */ + /* xor eax, eax */ + EMIT2(0x31, 0xc0); + /* mov qword ptr [rbp-X], rax */ EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32); BUILD_BUG_ON(cnt != PROLOGUE_SIZE); @@ -455,6 +459,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } case BPF_ALU | BPF_MOV | BPF_K: + /* optimization: if imm32 is zero, use 'xor ,' + * to save 3 bytes. + */ + if (imm32 == 0) { + if (is_ereg(dst_reg)) + EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + b2 = 0x31; /* xor */ + b3 = 0xC0; + EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); + break; + } + /* mov %eax, imm32 */ if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); @@ -469,6 +485,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, return -EINVAL; } + /* optimization: if imm64 is zero, use 'xor ,' + * to save 7 bytes. + */ + if (insn[0].imm == 0 && insn[1].imm == 0) { + b1 = add_2mod(0x48, dst_reg, dst_reg); + b2 = 0x31; /* xor */ + b3 = 0xC0; + EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg)); + + insn++; + i++; + break; + } + /* movabsq %rax, imm64 */ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); EMIT(insn[0].imm, 4); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9ea2d22fa2cb5af6c14ac09bb7e553eb1cbed87f..8bed7f1176b88746c4ea2c741bea3d935bc961db 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -269,6 +269,7 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_perf_event_output, + BPF_FUNC_skb_load_bytes, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 334b1bdd572c1e66007bd7163c9b98ee9c37deb5..972d9a8e4ac4d31b03ba5d3f86db76d86ecc5707 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -306,10 +306,6 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; ARG1 = (u64) (unsigned long) ctx; - /* Registers used in classic BPF programs need to be reset first. */ - regs[BPF_REG_A] = 0; - regs[BPF_REG_X] = 0; - select_insn: goto *jumptable[insn->code]; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 10cd1860e5b04aa339853ff893855941aba41600..27a7a26b1ece2e145296b144096191ffa6af504e 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1685,6 +1685,126 @@ static struct bpf_test tests[] = { { }, { { 0, 0x35d97ef2 } } }, + { /* Mainly checking JIT here. */ + "MOV REG64", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffffffffffffLL), + BPF_MOV64_REG(R1, R0), + BPF_MOV64_REG(R2, R1), + BPF_MOV64_REG(R3, R2), + BPF_MOV64_REG(R4, R3), + BPF_MOV64_REG(R5, R4), + BPF_MOV64_REG(R6, R5), + BPF_MOV64_REG(R7, R6), + BPF_MOV64_REG(R8, R7), + BPF_MOV64_REG(R9, R8), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_ALU64_IMM(BPF_MOV, R1, 0), + BPF_ALU64_IMM(BPF_MOV, R2, 0), + BPF_ALU64_IMM(BPF_MOV, R3, 0), + BPF_ALU64_IMM(BPF_MOV, R4, 0), + BPF_ALU64_IMM(BPF_MOV, R5, 0), + BPF_ALU64_IMM(BPF_MOV, R6, 0), + BPF_ALU64_IMM(BPF_MOV, R7, 0), + BPF_ALU64_IMM(BPF_MOV, R8, 0), + BPF_ALU64_IMM(BPF_MOV, R9, 0), + BPF_ALU64_REG(BPF_ADD, R0, R0), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_ALU64_REG(BPF_ADD, R0, R2), + BPF_ALU64_REG(BPF_ADD, R0, R3), + BPF_ALU64_REG(BPF_ADD, R0, R4), + BPF_ALU64_REG(BPF_ADD, R0, R5), + BPF_ALU64_REG(BPF_ADD, R0, R6), + BPF_ALU64_REG(BPF_ADD, R0, R7), + BPF_ALU64_REG(BPF_ADD, R0, R8), + BPF_ALU64_REG(BPF_ADD, R0, R9), + BPF_ALU64_IMM(BPF_ADD, R0, 0xfefe), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfefe } } + }, + { /* Mainly checking JIT here. */ + "MOV REG32", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffffffffffffLL), + BPF_MOV64_REG(R1, R0), + BPF_MOV64_REG(R2, R1), + BPF_MOV64_REG(R3, R2), + BPF_MOV64_REG(R4, R3), + BPF_MOV64_REG(R5, R4), + BPF_MOV64_REG(R6, R5), + BPF_MOV64_REG(R7, R6), + BPF_MOV64_REG(R8, R7), + BPF_MOV64_REG(R9, R8), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU32_IMM(BPF_MOV, R2, 0), + BPF_ALU32_IMM(BPF_MOV, R3, 0), + BPF_ALU32_IMM(BPF_MOV, R4, 0), + BPF_ALU32_IMM(BPF_MOV, R5, 0), + BPF_ALU32_IMM(BPF_MOV, R6, 0), + BPF_ALU32_IMM(BPF_MOV, R7, 0), + BPF_ALU32_IMM(BPF_MOV, R8, 0), + BPF_ALU32_IMM(BPF_MOV, R9, 0), + BPF_ALU64_REG(BPF_ADD, R0, R0), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_ALU64_REG(BPF_ADD, R0, R2), + BPF_ALU64_REG(BPF_ADD, R0, R3), + BPF_ALU64_REG(BPF_ADD, R0, R4), + BPF_ALU64_REG(BPF_ADD, R0, R5), + BPF_ALU64_REG(BPF_ADD, R0, R6), + BPF_ALU64_REG(BPF_ADD, R0, R7), + BPF_ALU64_REG(BPF_ADD, R0, R8), + BPF_ALU64_REG(BPF_ADD, R0, R9), + BPF_ALU64_IMM(BPF_ADD, R0, 0xfefe), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfefe } } + }, + { /* Mainly checking JIT here. */ + "LD IMM64", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffffffffffffLL), + BPF_MOV64_REG(R1, R0), + BPF_MOV64_REG(R2, R1), + BPF_MOV64_REG(R3, R2), + BPF_MOV64_REG(R4, R3), + BPF_MOV64_REG(R5, R4), + BPF_MOV64_REG(R6, R5), + BPF_MOV64_REG(R7, R6), + BPF_MOV64_REG(R8, R7), + BPF_MOV64_REG(R9, R8), + BPF_LD_IMM64(R0, 0x0LL), + BPF_LD_IMM64(R1, 0x0LL), + BPF_LD_IMM64(R2, 0x0LL), + BPF_LD_IMM64(R3, 0x0LL), + BPF_LD_IMM64(R4, 0x0LL), + BPF_LD_IMM64(R5, 0x0LL), + BPF_LD_IMM64(R6, 0x0LL), + BPF_LD_IMM64(R7, 0x0LL), + BPF_LD_IMM64(R8, 0x0LL), + BPF_LD_IMM64(R9, 0x0LL), + BPF_ALU64_REG(BPF_ADD, R0, R0), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_ALU64_REG(BPF_ADD, R0, R2), + BPF_ALU64_REG(BPF_ADD, R0, R3), + BPF_ALU64_REG(BPF_ADD, R0, R4), + BPF_ALU64_REG(BPF_ADD, R0, R5), + BPF_ALU64_REG(BPF_ADD, R0, R6), + BPF_ALU64_REG(BPF_ADD, R0, R7), + BPF_ALU64_REG(BPF_ADD, R0, R8), + BPF_ALU64_REG(BPF_ADD, R0, R9), + BPF_ALU64_IMM(BPF_ADD, R0, 0xfefe), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfefe } } + }, { "INT: ALU MIX", .u.insns_int = { diff --git a/net/core/filter.c b/net/core/filter.c index 672eefbfbe99fff2ade1bd2a095fb2366a2d2c0b..c770196ae8d513472d3b9691d0af3caab3fa1e10 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -348,12 +348,6 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * jump offsets, 2nd pass remapping: * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len); * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); - * - * User BPF's register A is mapped to our BPF register 6, user BPF - * register X is mapped to BPF register 7; frame pointer is always - * register 10; Context 'void *ctx' is stored in register 1, that is, - * for socket filters: ctx == 'struct sk_buff *', for seccomp: - * ctx == 'struct seccomp_data *'. */ static int bpf_convert_filter(struct sock_filter *prog, int len, struct bpf_insn *new_prog, int *new_len) @@ -381,9 +375,22 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, new_insn = new_prog; fp = prog; - if (new_insn) - *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); - new_insn++; + /* Classic BPF related prologue emission. */ + if (new_insn) { + /* Classic BPF expects A and X to be reset first. These need + * to be guaranteed to be the first two instructions. + */ + *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); + *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); + + /* All programs must keep CTX in callee saved BPF_REG_CTX. + * In eBPF case it's done by the compiler, here we need to + * do this ourself. Initial CTX is present in BPF_REG_ARG1. + */ + *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); + } else { + new_insn += 3; + } for (i = 0; i < len; fp++, i++) { struct bpf_insn tmp_insns[6] = { }; @@ -1245,6 +1252,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) } #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) +#define BPF_LDST_LEN 16U static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { @@ -1252,7 +1260,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) int offset = (int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; - char buf[16]; + char buf[BPF_LDST_LEN]; void *ptr; /* bpf verifier guarantees that: @@ -1299,6 +1307,36 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg5_type = ARG_ANYTHING, }; +static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; + int offset = (int) r2; + void *to = (void *)(unsigned long) r3; + unsigned int len = (unsigned int) r4; + void *ptr; + + if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN)) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, len, to); + if (unlikely(!ptr)) + return -EFAULT; + if (ptr != to) + memcpy(to, ptr, len); + + return 0; +} + +const struct bpf_func_proto bpf_skb_load_bytes_proto = { + .func = bpf_skb_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE, +}; + #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) @@ -1654,6 +1692,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_skb_store_bytes: return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: