提交 48423a0b 编写于 作者: P Pavel Hrdina

vircgroup: introduce virCgroupV2DevicesAttachProg

This function loads the BPF prog with prepared map into kernel and
attaches it into guest cgroup.  It can be also used to replace existing
program in the cgroup if we need to resize BPF map to store more rules
for devices. The old program will be closed and removed from kernel.

There are two possible ways how to create BPF program:

    - One way is to write simple C-like code which can by compiled into
      BPF object file which can be loaded into kernel using elfutils.

    - The second way is to define macros which look like assembler
      instructions and can be used directly to create BPF program that
      can be directly loaded into kernel.

Since the program is not too complex we can use the second option.

If there is no program, all devices are allowed, if there is some
program it is executed and based on the exit status the access is
denied for 0 and allowed for 1.

Our program will follow these rules:

    - first it will try to look for the specific key using major and
      minor to see if there is any rule for that specific device

    - if there is no specific rule it will try to look for any rule that
      matches only major of the device

    - if there is no match with major it will try the same but with
      minor of the device

    - as the last attempt it will try to look for rule for all devices
      and if there is no match it will return 0 to deny that access
Signed-off-by: NPavel Hrdina <phrdina@redhat.com>
Reviewed-by: NJán Tomko <jtomko@redhat.com>
上级 30b6ddc4
......@@ -230,6 +230,7 @@
@SRCDIR@/src/util/vircgroupbackend.h
@SRCDIR@/src/util/vircgroupv1.c
@SRCDIR@/src/util/vircgroupv2.c
@SRCDIR@/src/util/vircgroupv2devices.c
@SRCDIR@/src/util/virclosecallbacks.c
@SRCDIR@/src/util/vircommand.c
@SRCDIR@/src/util/virconf.c
......
......@@ -1717,6 +1717,7 @@ virCgroupV1Register;
virCgroupV2Register;
# util/vircgroupv2devices.h
virCgroupV2DevicesAttachProg;
virCgroupV2DevicesAvailable;
# util/virclosecallbacks.h
......
......@@ -41,10 +41,20 @@ struct _virCgroupV1Controller {
typedef struct _virCgroupV1Controller virCgroupV1Controller;
typedef virCgroupV1Controller *virCgroupV1ControllerPtr;
struct _virCgroupV2Devices {
int mapfd;
int progfd;
ssize_t count;
ssize_t max;
};
typedef struct _virCgroupV2Devices virCgroupV2Devices;
typedef virCgroupV2Devices *virCgroupV2DevicesPtr;
struct _virCgroupV2Controller {
int controllers;
char *mountPoint;
char *placement;
virCgroupV2Devices devices;
};
typedef struct _virCgroupV2Controller virCgroupV2Controller;
typedef virCgroupV2Controller *virCgroupV2ControllerPtr;
......
......@@ -30,6 +30,7 @@
#define LIBVIRT_VIRCGROUPPRIV_H_ALLOW
#include "vircgrouppriv.h"
#include "viralloc.h"
#include "virbpf.h"
#include "vircgroup.h"
#include "vircgroupv2devices.h"
......@@ -60,10 +61,283 @@ virCgroupV2DevicesAvailable(virCgroupPtr group)
return true;
}
/* Steps to get assembly version of devices BPF program:
*
* Save the following program into bpfprog.c, compile it using clang:
*
* clang -O2 -Wall -target bpf -c bpfprog.c -o bpfprog.o
*
* Now you can use llvm-objdump to get the list if instructions:
*
* llvm-objdump -S -no-show-raw-insn bpfprog.o
*
* which can be converted into program using VIR_BPF_* macros.
*
* ----------------------------------------------------------------------------
* #include <linux/bpf.h>
* #include <linux/version.h>
*
* #define SEC(NAME) __attribute__((section(NAME), used))
*
* struct bpf_map_def {
* unsigned int type;
* unsigned int key_size;
* unsigned int value_size;
* unsigned int max_entries;
* unsigned int map_flags;
* unsigned int inner_map_idx;
* unsigned int numa_node;
* };
*
* static void *(*bpf_map_lookup_elem)(void *map, void *key) =
* (void *) BPF_FUNC_map_lookup_elem;
*
* struct bpf_map_def SEC("maps") devices = {
* .type = BPF_MAP_TYPE_HASH,
* .key_size = sizeof(__u64),
* .value_size = sizeof(__u32),
* .max_entries = 65,
* };
*
* SEC("cgroup/dev") int
* bpf_libvirt_cgroup_device(struct bpf_cgroup_dev_ctx *ctx)
* {
* __u64 key = ((__u64)ctx->major << 32) | ctx->minor;
* __u32 *val = 0;
*
* val = bpf_map_lookup_elem(&devices, &key);
* if (val && (ctx->access_type & *val) == ctx->access_type)
* return 1;
*
* key = ((__u64)ctx->major << 32) | 0xffffffff;
* val = bpf_map_lookup_elem(&devices, &key);
* if (val && (ctx->access_type & *val) == ctx->access_type)
* return 1;
*
* key = 0xffffffff00000000 | ctx->minor;
* val = bpf_map_lookup_elem(&devices, &key);
* if (val && (ctx->access_type & *val) == ctx->access_type)
* return 1;
*
* key = 0xffffffffffffffff;
* val = bpf_map_lookup_elem(&devices, &key);
* if (val && (ctx->access_type & *val) == ctx->access_type)
* return 1;
*
* return 0;
* }
*
* char _license[] SEC("license") = "GPL";
* __u32 _version SEC("version") = LINUX_VERSION_CODE;
* ----------------------------------------------------------------------------
* */
static int
virCgroupV2DevicesLoadProg(int mapfd)
{
struct bpf_insn prog[] = {
/* 0: r6 = r1 */
VIR_BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
/* 1: r1 = *(u32 *)(r6 + 8) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 8),
/* 2: r2 = *(u32 *)(r6 + 4) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 4),
/* 3: r2 <<= 32 */
VIR_BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
/* 4: r2 |= r1 */
VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
/* 5: *(u64 *)(r10 - 8) = r2 */
VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
/* 6: r2 = r10 */
VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
/* 7: r2 += -8 */
VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
/* 8: r1 = 0 ll */
VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd),
/* 10: call 1 */
VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem),
/* 11: r1 = r0 */
VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
/* 12: if r1 == 0 goto +5 <LBB0_2> */
VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
/* 13: r0 = 1 */
VIR_BPF_MOV64_IMM(BPF_REG_0, 1),
/* 14: r2 = *(u32 *)(r6 + 0) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0),
/* 15: r1 = *(u32 *)(r1 + 0) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
/* 16: r1 &= r2 */
VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
/* 17: if r1 == r2 goto +50 <LBB0_9> */
VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 50),
/* LBB0_2: */
/* 18: r1 = *(u32 *)(r6 + 4) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4),
/* 19: r1 <<= 32 */
VIR_BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
/* 20: r2 = 4294967295 ll */
VIR_BPF_LD_IMM64(BPF_REG_2, 0xffffffff),
/* 22: r1 |= r2 */
VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_2),
/* 23: *(u64 *)(r10 - 8) = r1 */
VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
/* 24: r2 = r10 */
VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
/* 25: r2 += -8 */
VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
/* 26: r1 = 0 ll */
VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd),
/* 28: call 1 */
VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem),
/* 29: r1 = r0 */
VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
/* 30: if r1 == 0 goto +5 <LBB0_4> */
VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
/* 31: r0 = 1 */
VIR_BPF_MOV64_IMM(BPF_REG_0, 1),
/* 32: r2 = *(u32 *)(r6 + 0) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0),
/* 33: r1 = *(u32 *)(r1 + 0) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
/* 34: r1 &= r2 */
VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
/* 35: if r1 == r2 goto +32 <LBB0_9> */
VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 32),
/* LBB0_4: */
/* 36: r1 = *(u32 *)(r6 + 8) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 8),
/* 37: r2 = -4294967296 ll */
VIR_BPF_LD_IMM64(BPF_REG_2, 0xffffffff00000000),
/* 39: r1 |= r2 */
VIR_BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_2),
/* 40: *(u64 *)(r10 - 8) = r1 */
VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
/* 41: r2 = r10 */
VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
/* 42: r2 += -8 */
VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
/* 43: r1 = 0 ll */
VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd),
/* 45: call 1 */
VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem),
/* 46: r1 = r0 */
VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
/* 47: if r1 == 0 goto +5 <LBB0_6> */
VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
/* 48: r0 = 1 */
VIR_BPF_MOV64_IMM(BPF_REG_0, 1),
/* 49: r2 = *(u32 *)(r6 + 0) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0),
/* 50: r1 = *(u32 *)(r1 + 0) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
/* 51: r1 &= r2 */
VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
/* 52: if r1 == r2 goto +15 <LBB0_9> */
VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 15),
/* LBB0_6: */
/* 53: r1 = -1 */
VIR_BPF_MOV64_IMM(BPF_REG_1, -1),
/* 54: *(u64 *)(r10 - 8) = r1 */
VIR_BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
/* 55: r2 = r10 */
VIR_BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
/* 56: r2 += -8 */
VIR_BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
/* 57: r1 = 0 ll */
VIR_BPF_LD_MAP_FD(BPF_REG_1, mapfd),
/* 59: call 1 */
VIR_BPF_CALL_INSN(BPF_FUNC_map_lookup_elem),
/* 60: r1 = r0 */
VIR_BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
/* 61: if r1 == 0 goto +5 <LBB0_8> */
VIR_BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
/* 62: r0 = 1 */
VIR_BPF_MOV64_IMM(BPF_REG_0, 1),
/* 63: r2 = *(u32 *)(r6 + 0) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, 0),
/* 64: r1 = *(u32 *)(r1 + 0) */
VIR_BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
/* 65: r1 &= r2 */
VIR_BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
/* 66: if r1 == r2 goto +1 <LBB0_9> */
VIR_BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
/* LBB0_8: */
/* 67: r0 = 0 */
VIR_BPF_MOV64_IMM(BPF_REG_0, 0),
/* LBB0_9: */
/* 68: exit */
VIR_BPF_EXIT_INSN(),
};
return virBPFLoadProg(prog, BPF_PROG_TYPE_CGROUP_DEVICE, G_N_ELEMENTS(prog));
}
int
virCgroupV2DevicesAttachProg(virCgroupPtr group,
int mapfd,
size_t max)
{
int ret = -1;
VIR_AUTOCLOSE progfd = -1;
VIR_AUTOCLOSE cgroupfd = -1;
g_autofree char *path = NULL;
if (virCgroupPathOfController(group, VIR_CGROUP_CONTROLLER_DEVICES,
NULL, &path) < 0) {
goto cleanup;
}
progfd = virCgroupV2DevicesLoadProg(mapfd);
if (progfd < 0) {
virReportSystemError(errno, "%s", _("failed to load cgroup BPF prog"));
goto cleanup;
}
cgroupfd = open(path, O_RDONLY);
if (cgroupfd < 0) {
virReportSystemError(errno, _("unable to open '%s'"), path);
goto cleanup;
}
if (virBPFAttachProg(progfd, cgroupfd, BPF_CGROUP_DEVICE) < 0) {
virReportSystemError(errno, "%s", _("failed to attach cgroup BPF prog"));
goto cleanup;
}
if (group->unified.devices.progfd > 0) {
VIR_DEBUG("Closing existing program that was replaced by new one.");
VIR_FORCE_CLOSE(group->unified.devices.progfd);
}
group->unified.devices.progfd = progfd;
group->unified.devices.mapfd = mapfd;
group->unified.devices.max = max;
progfd = -1;
mapfd = -1;
ret = 0;
cleanup:
VIR_FORCE_CLOSE(mapfd);
return ret;
}
#else /* !HAVE_DECL_BPF_CGROUP_DEVICE */
bool
virCgroupV2DevicesAvailable(virCgroupPtr group G_GNUC_UNUSED)
{
return false;
}
int
virCgroupV2DevicesAttachProg(virCgroupPtr group G_GNUC_UNUSED,
int mapfd G_GNUC_UNUSED,
size_t max G_GNUC_UNUSED)
{
virReportSystemError(ENOSYS, "%s",
_("cgroups v2 BPF devices not supported "
"with this kernel"));
return -1;
}
#endif /* !HAVE_DECL_BPF_CGROUP_DEVICE */
......@@ -22,3 +22,8 @@
bool
virCgroupV2DevicesAvailable(virCgroupPtr group);
int
virCgroupV2DevicesAttachProg(virCgroupPtr group,
int mapfd,
size_t max);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册