diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 984f73b719a99b10a6e683cff5a55d4a41b57527..a4414a11eea71329d07e8cae504ce6f9791ed2c6 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, return cgrp->ancestor_ids[ancestor->level] == ancestor->id; } +/** + * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry + * @task: the task to be tested + * @ancestor: possible ancestor of @task's cgroup + * + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. + * It follows all the same rules as cgroup_is_descendant, and only applies + * to the default hierarchy. + */ +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + struct css_set *cset = task_css_set(task); + + return cgroup_is_descendant(cset->dfl_cgrp, ancestor); +} + /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { @@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; +struct cgroup; static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, @@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + return true; +} #endif /* !CONFIG_CGROUPS */ /* diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da218fec605657ee415f8ad71a95d8851330a9de..bea0c4e2830a48a3596a2d70de234824e59d0e35 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -375,6 +375,17 @@ enum bpf_func_id { */ BPF_FUNC_probe_write_user, + /** + * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_current_task_under_cgroup, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 633a650d7aeb74ff2e6ebc2e4134950cf94bd288..a2ac051c342f87b1372a7aeb33ba816327ee1a43 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void) } late_initcall(register_perf_event_array_map); -#ifdef CONFIG_SOCK_CGROUP_DATA +#ifdef CONFIG_CGROUPS static void *cgroup_fd_array_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, int fd) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7094c69ac1995892c5cfa31c061cf9f3a59232de..d504722ebfa446561789750441df6c0dbc2ae1b7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) goto error; break; case BPF_MAP_TYPE_CGROUP_ARRAY: - if (func_id != BPF_FUNC_skb_in_cgroup) + if (func_id != BPF_FUNC_skb_in_cgroup && + func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; default: @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; + case BPF_FUNC_current_task_under_cgroup: case BPF_FUNC_skb_in_cgroup: if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) goto error; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b20438fdb0295a5fdedee44b6c1ae4b8cf229200..6b794d6669a7d0016472d8970020f23842bcbd56 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -376,6 +376,34 @@ static const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; +static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *)(long)r1; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + u32 idx = (u32)r2; + + if (unlikely(in_interrupt())) + return -EINVAL; + + if (unlikely(idx >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[idx]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return task_under_cgroup_hierarchy(current, cgrp); +} + +static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { + .func = bpf_current_task_under_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -407,6 +435,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return &bpf_perf_event_read_proto; case BPF_FUNC_probe_write_user: return bpf_get_probe_write_proto(); + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; default: return NULL; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 90ebf7d35c070cf93a076774c988168a459df13a..eb582c6264c393fb8b56ab5ca2664539940a4883 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -24,6 +24,7 @@ hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += test_current_task_under_cgroup test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -49,6 +50,8 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o xdp1-objs := bpf_load.o libbpf.o xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o libbpf.o xdp1_user.o +test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ + test_current_task_under_cgroup_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -74,6 +77,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += test_current_task_under_cgroup_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -97,6 +101,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_test_current_task_under_cgroup += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index cbc52df165b483bf9f08ff535ca285190c3bc2a4..5e4c41e256b8e54a1c7549cb93551f7a4917cf53 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -45,6 +45,8 @@ static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = (void *) BPF_FUNC_get_stackid; static int (*bpf_probe_write_user)(void *dst, void *src, int size) = (void *) BPF_FUNC_probe_write_user; +static int (*bpf_current_task_under_cgroup)(void *map, int index) = + (void *) BPF_FUNC_current_task_under_cgroup; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c new file mode 100644 index 0000000000000000000000000000000000000000..86b28d7d6c998553a264e0872fd2d6a9d1949de9 --- /dev/null +++ b/samples/bpf/test_current_task_under_cgroup_kern.c @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include +#include "bpf_helpers.h" +#include + +struct bpf_map_def SEC("maps") cgroup_map = { + .type = BPF_MAP_TYPE_CGROUP_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") perf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 1, +}; + +/* Writes the last PID that called sync to a map at index 0 */ +SEC("kprobe/sys_sync") +int bpf_prog1(struct pt_regs *ctx) +{ + u64 pid = bpf_get_current_pid_tgid(); + int idx = 0; + + if (!bpf_current_task_under_cgroup(&cgroup_map, 0)) + return 0; + + bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c new file mode 100644 index 0000000000000000000000000000000000000000..30b0bce884f93f89c56481469d82649d509dc4e9 --- /dev/null +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -0,0 +1,145 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CGROUP_MOUNT_PATH "/mnt" +#define CGROUP_PATH "/mnt/my-cgroup" + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) +#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) + +static int join_cgroup(char *path) +{ + int fd, rc = 0; + pid_t pid = getpid(); + char cgroup_path[PATH_MAX + 1]; + + snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path); + + fd = open(cgroup_path, O_WRONLY); + if (fd < 0) { + log_err("Opening Cgroup"); + return 1; + } + + if (dprintf(fd, "%d\n", pid) < 0) { + log_err("Joining Cgroup"); + rc = 1; + } + close(fd); + return rc; +} + +int main(int argc, char **argv) +{ + char filename[256]; + int cg2, idx = 0; + pid_t remote_pid, local_pid = getpid(); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + /* + * This is to avoid interfering with existing cgroups. Unfortunately, + * most people don't have cgroupv2 enabled at this point in time. + * It's easier to create our own mount namespace and manage it + * ourselves. + */ + if (unshare(CLONE_NEWNS)) { + log_err("unshare"); + return 1; + } + + if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { + log_err("mount fakeroot"); + return 1; + } + + if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) { + log_err("mount cgroup2"); + return 1; + } + + if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) { + log_err("mkdir cgroup"); + return 1; + } + + cg2 = open(CGROUP_PATH, O_RDONLY); + if (cg2 < 0) { + log_err("opening target cgroup"); + goto cleanup_cgroup_err; + } + + if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { + log_err("Adding target cgroup to map"); + goto cleanup_cgroup_err; + } + if (join_cgroup("/mnt/my-cgroup")) { + log_err("Leaving target cgroup"); + goto cleanup_cgroup_err; + } + + /* + * The installed helper program catched the sync call, and should + * write it to the map. + */ + + sync(); + bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + + if (local_pid != remote_pid) { + fprintf(stderr, + "BPF Helper didn't write correct PID to map, but: %d\n", + remote_pid); + goto leave_cgroup_err; + } + + /* Verify the negative scenario; leave the cgroup */ + if (join_cgroup(CGROUP_MOUNT_PATH)) + goto leave_cgroup_err; + + remote_pid = 0; + bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); + + sync(); + bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + + if (local_pid == remote_pid) { + fprintf(stderr, "BPF cgroup negative test did not work\n"); + goto cleanup_cgroup_err; + } + + rmdir(CGROUP_PATH); + return 0; + + /* Error condition, cleanup */ +leave_cgroup_err: + join_cgroup(CGROUP_MOUNT_PATH); +cleanup_cgroup_err: + rmdir(CGROUP_PATH); + return 1; +}