diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 661936f90fe08fafda358b2809cf414135c1b6e6..ce308b9a317cfdf23fa9c25b6a2dd4b6b66cc83a 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -174,131 +174,36 @@ int sys_enter(struct syscall_enter_args *args) probe_read(&augmented_args->args, sizeof(augmented_args->args), args); - syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr); - if (syscall == NULL || !syscall->enabled) - return 0; /* - * Yonghong and Edward Cree sayz: - * - * https://www.spinics.net/lists/netdev/msg531645.html - * - * >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 - * >> 10: (bf) r1 = r6 - * >> 11: (07) r1 += 16 - * >> 12: (05) goto pc+2 - * >> 15: (79) r3 = *(u64 *)(r1 +0) - * >> dereference of modified ctx ptr R1 off=16 disallowed - * > Aha, we at least got a different error message this time. - * > And indeed llvm has done that optimisation, rather than the more obvious - * > 11: r3 = *(u64 *)(r1 +16) - * > because it wants to have lots of reads share a single insn. You may be able - * > to defeat that optimisation by adding compiler barriers, idk. Maybe someone - * > with llvm knowledge can figure out how to stop it (ideally, llvm would know - * > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯ - * - * The optimization mostly likes below: - * - * br1: - * ... - * r1 += 16 - * goto merge - * br2: - * ... - * r1 += 20 - * goto merge - * merge: - * *(u64 *)(r1 + 0) - * - * The compiler tries to merge common loads. There is no easy way to - * stop this compiler optimization without turning off a lot of other - * optimizations. The easiest way is to add barriers: - * - * __asm__ __volatile__("": : :"memory") - * - * after the ctx memory access to prevent their down stream merging. + * Jump to syscall specific augmenter, even if the default one, + * "!raw_syscalls:unaugmented" that will just return 1 to return the + * unagmented tracepoint payload. */ - /* - * For now copy just the first string arg, we need to improve the protocol - * and have more than one. - * - * Using the unrolled loop is not working, only when we do it manually, - * check this out later... - - u8 arg; -#pragma clang loop unroll(full) - for (arg = 0; arg < 6; ++arg) { - if (syscall->string_args_len[arg] != 0) { - filename_len = syscall->string_args_len[arg]; - filename_arg = (const void *)args->args[arg]; - __asm__ __volatile__("": : :"memory"); - break; - } - } - - verifier log: - -; if (syscall->string_args_len[arg] != 0) { -37: (69) r3 = *(u16 *)(r0 +2) - R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv0 R2_w=map_value(id=0,off=2,ks=4,vs=14,imm=0) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm -; if (syscall->string_args_len[arg] != 0) { -38: (55) if r3 != 0x0 goto pc+5 - R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv0 R2=map_value(id=0,off=2,ks=4,vs=14,imm=0) R3=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm -39: (b7) r1 = 1 -; if (syscall->string_args_len[arg] != 0) { -40: (bf) r2 = r0 -41: (07) r2 += 4 -42: (69) r3 = *(u16 *)(r0 +4) - R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv1 R2_w=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3_w=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm -; if (syscall->string_args_len[arg] != 0) { -43: (15) if r3 == 0x0 goto pc+32 - R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv1 R2=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm -; filename_arg = (const void *)args->args[arg]; -44: (67) r1 <<= 3 -45: (bf) r3 = r6 -46: (0f) r3 += r1 -47: (b7) r5 = 64 -48: (79) r3 = *(u64 *)(r3 +16) -dereference of modified ctx ptr R3 off=8 disallowed -processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_states 12 mark_read 7 - */ - -#define __loop_iter(arg) \ - if (syscall->string_args_len[arg] != 0) { \ - unsigned int filename_len = syscall->string_args_len[arg]; \ - const void *filename_arg = (const void *)args->args[arg]; \ - if (filename_len <= sizeof(augmented_args->filename.value)) \ - len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len); -#define loop_iter_first() __loop_iter(0); } -#define loop_iter(arg) else __loop_iter(arg); } -#define loop_iter_last(arg) else __loop_iter(arg); __asm__ __volatile__("": : :"memory"); } - - loop_iter_first() - loop_iter(1) - loop_iter(2) - loop_iter(3) - loop_iter(4) - loop_iter_last(5) + bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len); + // If not found on the PROG_ARRAY syscalls map, then we're filtering it: + return 0; } SEC("raw_syscalls:sys_exit") int sys_exit(struct syscall_exit_args *args) { struct syscall_exit_args exit_args; - struct syscall *syscall; if (pid_filter__has(&pids_filtered, getpid())) return 0; probe_read(&exit_args, sizeof(exit_args), args); - - syscall = bpf_map_lookup_elem(&syscalls, &exit_args.syscall_nr); - if (syscall == NULL || !syscall->enabled) - return 0; - - return 1; + /* + * Jump to syscall specific return augmenter, even if the default one, + * "!raw_syscalls:unaugmented" that will just return 1 to return the + * unagmented tracepoint payload. + */ + bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); + /* + * If not found on the PROG_ARRAY syscalls map, then we're filtering it: + */ + return 0; } license(GPL);