From 48423a0b5d5a8591d5c46faabfc2b0071c152e8f Mon Sep 17 00:00:00 2001 From: Pavel Hrdina Date: Mon, 24 Jun 2019 14:15:31 +0200 Subject: [PATCH] vircgroup: introduce virCgroupV2DevicesAttachProg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function loads the BPF prog with prepared map into kernel and attaches it into guest cgroup. It can be also used to replace existing program in the cgroup if we need to resize BPF map to store more rules for devices. The old program will be closed and removed from kernel. There are two possible ways how to create BPF program: - One way is to write simple C-like code which can by compiled into BPF object file which can be loaded into kernel using elfutils. - The second way is to define macros which look like assembler instructions and can be used directly to create BPF program that can be directly loaded into kernel. Since the program is not too complex we can use the second option. If there is no program, all devices are allowed, if there is some program it is executed and based on the exit status the access is denied for 0 and allowed for 1. Our program will follow these rules: - first it will try to look for the specific key using major and minor to see if there is any rule for that specific device - if there is no specific rule it will try to look for any rule that matches only major of the device - if there is no match with major it will try the same but with minor of the device - as the last attempt it will try to look for rule for all devices and if there is no match it will return 0 to deny that access Signed-off-by: Pavel Hrdina Reviewed-by: Ján Tomko --- po/POTFILES.in | 1 + src/libvirt_private.syms | 1 + src/util/vircgrouppriv.h | 10 ++ src/util/vircgroupv2devices.c | 274 ++++++++++++++++++++++++++++++++++ src/util/vircgroupv2devices.h | 5 + 5 files changed, 291 insertions(+) diff --git a/po/POTFILES.in b/po/POTFILES.in index 984ec36c0f..f93fb9694d 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -230,6 +230,7 @@ @SRCDIR@/src/util/vircgroupbackend.h @SRCDIR@/src/util/vircgroupv1.c @SRCDIR@/src/util/vircgroupv2.c +@SRCDIR@/src/util/vircgroupv2devices.c @SRCDIR@/src/util/virclosecallbacks.c @SRCDIR@/src/util/vircommand.c @SRCDIR@/src/util/virconf.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index c933277918..975733f71e 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1717,6 +1717,7 @@ virCgroupV1Register; virCgroupV2Register; # util/vircgroupv2devices.h +virCgroupV2DevicesAttachProg; virCgroupV2DevicesAvailable; # util/virclosecallbacks.h diff --git a/src/util/vircgrouppriv.h b/src/util/vircgrouppriv.h index 334095719e..f2a80aeb82 100644 --- a/src/util/vircgrouppriv.h +++ b/src/util/vircgrouppriv.h @@ -41,10 +41,20 @@ struct _virCgroupV1Controller { typedef struct _virCgroupV1Controller virCgroupV1Controller; typedef virCgroupV1Controller *virCgroupV1ControllerPtr; +struct _virCgroupV2Devices { + int mapfd; + int progfd; + ssize_t count; + ssize_t max; +}; +typedef struct _virCgroupV2Devices virCgroupV2Devices; +typedef virCgroupV2Devices *virCgroupV2DevicesPtr; + struct _virCgroupV2Controller { int controllers; char *mountPoint; char *placement; + virCgroupV2Devices devices; }; typedef struct _virCgroupV2Controller virCgroupV2Controller; typedef virCgroupV2Controller *virCgroupV2ControllerPtr; diff --git a/src/util/vircgroupv2devices.c b/src/util/vircgroupv2devices.c index 8641645810..c30a23f165 100644 --- a/src/util/vircgroupv2devices.c +++ b/src/util/vircgroupv2devices.c @@ -30,6 +30,7 @@ #define LIBVIRT_VIRCGROUPPRIV_H_ALLOW #include "vircgrouppriv.h" +#include "viralloc.h" #include "virbpf.h" #include "vircgroup.h" #include "vircgroupv2devices.h" @@ -60,10 +61,283 @@ virCgroupV2DevicesAvailable(virCgroupPtr group) return true; } + + +/* Steps to get assembly version of devices BPF program: + * + * Save the following program into bpfprog.c, compile it using clang: + * + * clang -O2 -Wall -target bpf -c bpfprog.c -o bpfprog.o + * + * Now you can use llvm-objdump to get the list if instructions: + * + * llvm-objdump -S -no-show-raw-insn bpfprog.o + * + * which can be converted into program using VIR_BPF_* macros. + * + * ---------------------------------------------------------------------------- + * #include + * #include + * + * #define SEC(NAME) __attribute__((section(NAME), used)) + * + * struct bpf_map_def { + * unsigned int type; + * unsigned int key_size; + * unsigned int value_size; + * unsigned int max_entries; + * unsigned int map_flags; + * unsigned int inner_map_idx; + * unsigned int numa_node; + * }; + * + * static void *(*bpf_map_lookup_elem)(void *map, void *key) = + * (void *) BPF_FUNC_map_lookup_elem; + * + * struct bpf_map_def SEC("maps") devices = { + * .type = BPF_MAP_TYPE_HASH, + * .key_size = sizeof(__u64), + * .value_size = sizeof(__u32), + * .max_entries = 65, + * }; + * + * SEC("cgroup/dev") int + * bpf_libvirt_cgroup_device(struct bpf_cgroup_dev_ctx *ctx) + * { + * __u64 key = ((__u64)ctx->major << 32) | ctx->minor; + * __u32 *val = 0; + * + * val = bpf_map_lookup_elem(&devices, &key); + * if (val && (ctx->access_type & *val) == ctx->access_type) + * return 1; + * + * key = ((__u64)ctx->major << 32) | 0xffffffff; + * val = bpf_map_lookup_elem(&devices, &key); + * if (val && (ctx->access_type & *val) == ctx->access_type) + * return 1; + * + * key = 0xffffffff00000000 | ctx->minor; + * val = bpf_map_lookup_elem(&devices, &key); + * if (val && (ctx->access_type & *val) == ctx->access_type) + * return 1; + * + * key = 0xffffffffffffffff; + * val = bpf_map_lookup_elem(&devices, &key); + * if (val && (ctx->access_type & *val) == ctx->access_type) + * return 1; + * + * return 0; + * } + * + * char _license[] SEC("license") = "GPL"; + * __u32 _version SEC("version") = LINUX_VERSION_CODE; + * ---------------------------------------------------------------------------- + * */ +static int +virCgroupV2DevicesLoadProg(int mapfd) +{ + struct bpf_insn prog[] = { + /* 0: r6 = r1 */ + VIR_BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* 1: r1 = *(u32 *)(r6 + 8) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 8), + /* 2: r2 = *(u32 *)(r6 + 4) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 4), + /* 3: r2 <<= 32 */ + VIR_BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), + /* 4: r2 |= r1 */ + VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + /* 5: *(u64 *)(r10 - 8) = r2 */ + VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), + /* 6: r2 = r10 */ + VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* 7: r2 += -8 */ + VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + /* 8: r1 = 0 ll */ + VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd), + /* 10: call 1 */ + VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem), + /* 11: r1 = r0 */ + VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* 12: if r1 == 0 goto +5 */ + VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* 13: r0 = 1 */ + VIR_BPF_MOV64_IMM(BPF_REG_0, 1), + /* 14: r2 = *(u32 *)(r6 + 0) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0), + /* 15: r1 = *(u32 *)(r1 + 0) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + /* 16: r1 &= r2 */ + VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + /* 17: if r1 == r2 goto +50 */ + VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 50), + /* LBB0_2: */ + /* 18: r1 = *(u32 *)(r6 + 4) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4), + /* 19: r1 <<= 32 */ + VIR_BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), + /* 20: r2 = 4294967295 ll */ + VIR_BPF_LD_IMM64(BPF_REG_2, 0xffffffff), + /* 22: r1 |= r2 */ + VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_2), + /* 23: *(u64 *)(r10 - 8) = r1 */ + VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + /* 24: r2 = r10 */ + VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* 25: r2 += -8 */ + VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + /* 26: r1 = 0 ll */ + VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd), + /* 28: call 1 */ + VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem), + /* 29: r1 = r0 */ + VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* 30: if r1 == 0 goto +5 */ + VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* 31: r0 = 1 */ + VIR_BPF_MOV64_IMM(BPF_REG_0, 1), + /* 32: r2 = *(u32 *)(r6 + 0) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0), + /* 33: r1 = *(u32 *)(r1 + 0) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + /* 34: r1 &= r2 */ + VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + /* 35: if r1 == r2 goto +32 */ + VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 32), + /* LBB0_4: */ + /* 36: r1 = *(u32 *)(r6 + 8) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 8), + /* 37: r2 = -4294967296 ll */ + VIR_BPF_LD_IMM64(BPF_REG_2, 0xffffffff00000000), + /* 39: r1 |= r2 */ + VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_2), + /* 40: *(u64 *)(r10 - 8) = r1 */ + VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + /* 41: r2 = r10 */ + VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* 42: r2 += -8 */ + VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + /* 43: r1 = 0 ll */ + VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd), + /* 45: call 1 */ + VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem), + /* 46: r1 = r0 */ + VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* 47: if r1 == 0 goto +5 */ + VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* 48: r0 = 1 */ + VIR_BPF_MOV64_IMM(BPF_REG_0, 1), + /* 49: r2 = *(u32 *)(r6 + 0) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0), + /* 50: r1 = *(u32 *)(r1 + 0) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + /* 51: r1 &= r2 */ + VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + /* 52: if r1 == r2 goto +15 */ + VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 15), + /* LBB0_6: */ + /* 53: r1 = -1 */ + VIR_BPF_MOV64_IMM(BPF_REG_1, -1), + /* 54: *(u64 *)(r10 - 8) = r1 */ + VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + /* 55: r2 = r10 */ + VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* 56: r2 += -8 */ + VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + /* 57: r1 = 0 ll */ + VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd), + /* 59: call 1 */ + VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem), + /* 60: r1 = r0 */ + VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* 61: if r1 == 0 goto +5 */ + VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* 62: r0 = 1 */ + VIR_BPF_MOV64_IMM(BPF_REG_0, 1), + /* 63: r2 = *(u32 *)(r6 + 0) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0), + /* 64: r1 = *(u32 *)(r1 + 0) */ + VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + /* 65: r1 &= r2 */ + VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + /* 66: if r1 == r2 goto +1 */ + VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), + /* LBB0_8: */ + /* 67: r0 = 0 */ + VIR_BPF_MOV64_IMM(BPF_REG_0, 0), + /* LBB0_9: */ + /* 68: exit */ + VIR_BPF_EXIT_INSN(), + }; + + return virBPFLoadProg(prog, BPF_PROG_TYPE_CGROUP_DEVICE, G_N_ELEMENTS(prog)); +} + + +int +virCgroupV2DevicesAttachProg(virCgroupPtr group, + int mapfd, + size_t max) +{ + int ret = -1; + VIR_AUTOCLOSE progfd = -1; + VIR_AUTOCLOSE cgroupfd = -1; + g_autofree char *path = NULL; + + if (virCgroupPathOfController(group, VIR_CGROUP_CONTROLLER_DEVICES, + NULL, &path) < 0) { + goto cleanup; + } + + progfd = virCgroupV2DevicesLoadProg(mapfd); + if (progfd < 0) { + virReportSystemError(errno, "%s", _("failed to load cgroup BPF prog")); + goto cleanup; + } + + cgroupfd = open(path, O_RDONLY); + if (cgroupfd < 0) { + virReportSystemError(errno, _("unable to open '%s'"), path); + goto cleanup; + } + + if (virBPFAttachProg(progfd, cgroupfd, BPF_CGROUP_DEVICE) < 0) { + virReportSystemError(errno, "%s", _("failed to attach cgroup BPF prog")); + goto cleanup; + } + + if (group->unified.devices.progfd > 0) { + VIR_DEBUG("Closing existing program that was replaced by new one."); + VIR_FORCE_CLOSE(group->unified.devices.progfd); + } + + group->unified.devices.progfd = progfd; + group->unified.devices.mapfd = mapfd; + group->unified.devices.max = max; + progfd = -1; + mapfd = -1; + + ret = 0; + cleanup: + VIR_FORCE_CLOSE(mapfd); + return ret; +} #else /* !HAVE_DECL_BPF_CGROUP_DEVICE */ bool virCgroupV2DevicesAvailable(virCgroupPtr group G_GNUC_UNUSED) { return false; } + + +int +virCgroupV2DevicesAttachProg(virCgroupPtr group G_GNUC_UNUSED, + int mapfd G_GNUC_UNUSED, + size_t max G_GNUC_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("cgroups v2 BPF devices not supported " + "with this kernel")); + return -1; +} #endif /* !HAVE_DECL_BPF_CGROUP_DEVICE */ diff --git a/src/util/vircgroupv2devices.h b/src/util/vircgroupv2devices.h index 2448a8890f..57454e80af 100644 --- a/src/util/vircgroupv2devices.h +++ b/src/util/vircgroupv2devices.h @@ -22,3 +22,8 @@ bool virCgroupV2DevicesAvailable(virCgroupPtr group); + +int +virCgroupV2DevicesAttachProg(virCgroupPtr group, + int mapfd, + size_t max); -- GitLab