diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 7e33d039ea171ae1a2fe1edfc54b3f92f6020dfd..1fa03508ff5ac6b85106628e20255a5a51ed4fde 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -95,6 +95,7 @@ show up in /proc/sys/kernel: - sysctl_writes_strict - tainted - threads-max +- unprivileged_bpf_disabled - unknown_nmi_panic - watchdog - watchdog_thresh @@ -1072,6 +1073,26 @@ available RAM pages threads-max is reduced accordingly. ============================================================== +unprivileged_bpf_disabled: + +Writing 1 to this entry will disable unprivileged calls to bpf(); +once disabled, calling bpf() without CAP_SYS_ADMIN will return +-EPERM. Once set to 1, this can't be cleared from the running kernel +anymore. + +Writing 2 to this entry will also disable unprivileged calls to bpf(), +however, an admin can still change this setting later on, if needed, by +writing 0 or 1 to this entry. + +If BPF_UNPRIV_DEFAULT_OFF is enabled in the kernel config, then this +entry will default to 2 instead of 0. + + 0 - Unprivileged calls to bpf() are enabled + 1 - Unprivileged calls to bpf() are disabled without recovery + 2 - Unprivileged calls to bpf() are disabled + +============================================================== + unknown_nmi_panic: The value in this file affects behavior of handling NMI. When the diff --git a/init/Kconfig b/init/Kconfig index 1a0b15c5a82b977ba5af765e4657c1f4521ce869..cef0e5fdc9014f26504a22a0cf33f4b3e1f7c71b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1539,6 +1539,16 @@ config BPF_JIT_ALWAYS_ON Enables BPF JIT and removes BPF interpreter to avoid speculative execution of BPF instructions by the interpreter +config BPF_UNPRIV_DEFAULT_OFF + bool "Disable unprivileged BPF by default" + depends on BPF_SYSCALL + help + Disables unprivileged BPF by default by setting the corresponding + /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can + still reenable it by setting it to 0 later on, or permanently + disable it by setting it to 1 (from which no other transition to + 0 is possible anymore). + config USERFAULTFD bool "Enable userfaultfd() system call" select ANON_INODES diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 30253fb4254e42047b6e95bb984c5c9496286c86..b11852ea1822864415859f7f6d7c4372d949d6ce 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -48,7 +48,8 @@ static DEFINE_SPINLOCK(prog_idr_lock); static DEFINE_IDR(map_idr); static DEFINE_SPINLOCK(map_idr_lock); -int sysctl_unprivileged_bpf_disabled __read_mostly; +int sysctl_unprivileged_bpf_disabled __read_mostly = + IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _ops) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5445dead9911a3933a36b72f3982460fd627b056..d2eebbcdff52e7cb6ba6edd115667bdc0135212f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -257,6 +257,28 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, #endif +#ifdef CONFIG_BPF_SYSCALL +static int bpf_unpriv_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, unpriv_enable = *(int *)table->data; + bool locked_state = unpriv_enable == 1; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &unpriv_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (locked_state && unpriv_enable != 1) + return -EPERM; + *(int *)table->data = unpriv_enable; + } + return ret; +} +#endif + static struct ctl_table kern_table[]; static struct ctl_table vm_table[]; static struct ctl_table fs_table[]; @@ -1247,10 +1269,9 @@ static struct ctl_table kern_table[] = { .data = &sysctl_unprivileged_bpf_disabled, .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &one, + .proc_handler = bpf_unpriv_handler, + .extra1 = &zero, + .extra2 = &two, }, #endif #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)