提交 2c1189e3 编写于 作者: H Hui Tang 提交者: Zheng Zengkai

samples:bpf: Add samples for cfs select core

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I5KUFB
CVE: NA

--------------------------------

1.Samples support hook of 'cfs_select_rq'
2.Samples support hook of 'cfs_wake_affine'
3.Samples support hook of 'cfs_select_exit'
Signed-off-by: NHui Tang <tanghui20@huawei.com>
上级 a36f1c81
......@@ -55,6 +55,7 @@ tprogs-y += xdp_sample_pkts
tprogs-y += ibumad
tprogs-y += hbm
tprogs-y += sched_preempt
tprogs-y += sched_select_core
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
......@@ -113,6 +114,7 @@ xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS)
hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS)
sched_preempt-objs := sched_preempt_user.o
sched_select_core-objs := sched_select_core_user.o
# Tell kbuild to always build the programs
always-y := $(tprogs-y)
......@@ -175,6 +177,7 @@ always-y += hbm_out_kern.o
always-y += hbm_edt_kern.o
always-y += xdpsock_kern.o
always-y += sched_preempt_kern.o
always-y += sched_select_core_kern.o
ifeq ($(ARCH), arm)
# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Sample select core BPF program.
* 'cfs_select_rq'
* Replace the original core selection policy or
* implement dynamic CPU affinity.
*
* 'cfs_select_rq_exit'
* Restoring the CPU affinity of the task before exiting of
* 'select_task_rq_fair'.
*
* To be used with 'cfs_select_rq' hook to implement
* dynamic CPU affinity.
*
* 'cfs_wake_affine'
* Determine on which CPU task can run soonest. Allow user to
* implement deferent policies.
*/
#include <linux/version.h>
#include <linux/sched.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/libbpf_sched.h>
#include <linux/cpumask.h>
#define STR_MAX (32)
#define SELECT_RQ_RANGE (-1)
#define SELECT_RQ_EXIT_CPU_VALID (-2)
/* From kernel/sched/sched.h */
#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* Child wakeup after fork */
#define WF_MIGRATED 0x04 /* Internal use, task got migrated */
#define WF_ON_CPU 0x08 /* Wakee is on_cpu */
#define TAG_ID(id) TAG_##id
enum tag_id {
TAG_NONE,
TAG_ID(1),
TAG_ID(2),
TAG_MAX
};
struct tag_info {
long tag;
char buf[STR_MAX];
};
struct tag_info tag_tbl[] = {
{TAG_NONE, ""},
{TAG_ID(1), "0-3"},
{TAG_ID(2), "4-7"},
{TAG_MAX, ""},
};
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
__type(value, int);
__uint(max_entries, 1);
} map_idlest_cpu SEC(".maps");
int sysctl_sched_util_low_pct = 85;
static inline bool prefer_cpus_valid(struct cpumask *prefer_cpus,
struct cpumask *cpus_allowed)
{
return !libbpf_cpumask_empty(prefer_cpus) &&
!libbpf_cpumask_equal(prefer_cpus, cpus_allowed) &&
libbpf_cpumask_subset(prefer_cpus, cpus_allowed);
}
static struct cpumask *select_better_cpus(struct task_struct *p,
struct cpumask *prefer_cpus,
int *idlest_cpu)
{
unsigned long util_avg_sum = 0;
unsigned long tg_capacity = 0;
unsigned int weight;
long min_util = INT_MIN;
struct task_group *tg;
long spare;
int cpu;
if (!prefer_cpus_valid(prefer_cpus, (void *)getVal(p->cpus_ptr)))
return (void *)getVal(p->cpus_ptr);
tg = p->sched_task_group;
libbpf_for_each_cpu(cpu, prefer_cpus) {
if (idlest_cpu && libbpf_available_idle_cpu(cpu)) {
*idlest_cpu = cpu;
} else if (idlest_cpu) {
spare = (long)(libbpf_capacity_of(cpu) - libbpf_cfs_util_avg_of(cpu));
if (spare > min_util) {
min_util = spare;
*idlest_cpu = cpu;
}
}
if (libbpf_available_idle_cpu(cpu))
return getVal(prefer_cpus);
util_avg_sum += libbpf_cfs_util_avg_of(cpu);
tg_capacity += libbpf_capacity_of(cpu);
}
weight = libbpf_cpumask_weight(prefer_cpus);
if (tg_capacity > weight &&
util_avg_sum * 100 <= tg_capacity * sysctl_sched_util_low_pct) {
return getVal(prefer_cpus);
}
return (void *)getVal(p->cpus_ptr);
}
SEC("sched/cfs_select_rq")
int BPF_PROG(cfs_select_cpu_range, struct sched_migrate_ctx *h_ctx)
{
struct cpumask *prefer_cpus = getVal(h_ctx->select_idle_mask);
struct task_struct *p = getVal(h_ctx->task);
struct cpumask *cpus_ptr;
int type = SELECT_RQ_RANGE;
long tag = getVal(p->tag);
int *idlest_cpu = 0;
int key = 0;
int ret;
if (tag <= TAG_NONE || tag >= TAG_MAX)
return type;
ret = libbpf_cpumask_cpulist_parse(tag_tbl[tag].buf, prefer_cpus);
if (ret)
return type;
idlest_cpu = bpf_map_lookup_elem(&map_idlest_cpu, &key);
if (!idlest_cpu)
return type;
cpus_ptr = select_better_cpus(p, prefer_cpus, idlest_cpu);
libbpf_sched_set_task_cpus_ptr((void *)h_ctx, getVal(cpus_ptr));
return type;
}
SEC("sched/cfs_select_rq_exit")
int BPF_PROG(cfs_select_cpu_range_exit, struct sched_migrate_ctx *h_ctx)
{
int *idlest_cpu;
int key = 0;
idlest_cpu = bpf_map_lookup_elem(&map_idlest_cpu, &key);
if (!idlest_cpu) {
libbpf_sched_set_task_cpus_ptr(h_ctx, (void *)getVal(h_ctx->cpus_allowed));
return SELECT_RQ_EXIT_CPU_VALID;
}
if (!libbpf_cpumask_test_cpu(getVal(h_ctx->new_cpu),
(void *)getVal(h_ctx->task->cpus_ptr))) {
libbpf_sched_set_task_cpus_ptr(h_ctx, (void *)getVal(h_ctx->cpus_allowed));
return *idlest_cpu;
}
libbpf_sched_set_task_cpus_ptr(h_ctx, (void *)getVal(h_ctx->cpus_allowed));
return SELECT_RQ_EXIT_CPU_VALID;
}
static int find_idlest_cpu(struct task_struct *p, int parent)
{
unsigned long min = INT_MAX;
int min_load_cpu = 0;
unsigned long load;
int cpu;
int i;
for (i = 0, cpu = -1; i < NR_CPUS; i++) {
cpu = libbpf_cpumask_next(cpu, (void *)getVal(p->cpus_ptr));
if (cpu >= libbpf_nr_cpus_ids())
break;
load = libbpf_cfs_load_avg_of(cpu);
if (load < min) {
min = load;
min_load_cpu = cpu;
}
}
return min_load_cpu;
}
static int select_idle_cpu(struct task_struct *p, int parent, int prev_cpu)
{
int cpu;
if (libbpf_available_idle_cpu(prev_cpu))
return prev_cpu;
if (libbpf_available_idle_cpu(parent))
return prev_cpu;
libbpf_for_each_cpu_wrap(cpu, (void *)getVal(p->cpus_ptr), prev_cpu) {
if (libbpf_available_idle_cpu(cpu))
return cpu;
}
return prev_cpu;
}
SEC("sched/cfs_select_rq")
int BPF_PROG(cfs_select_cpu, struct sched_migrate_ctx *h_ctx)
{
struct task_struct *p = getVal(h_ctx->task);
int wake_flags = getVal(h_ctx->wake_flags);
int prev_cpu = getVal(h_ctx->prev_cpu);
int cpu = getVal(h_ctx->curr_cpu);
int new_cpu;
if (wake_flags == WF_FORK) {
/* Slow path */
new_cpu = find_idlest_cpu(p, cpu);
} else {
/* Fast path */
new_cpu = select_idle_cpu(p, cpu, prev_cpu);
}
return new_cpu;
}
SEC("sched/cfs_wake_affine")
int BPF_PROG(cfs_wake_affine, struct sched_affine_ctx *h_ctx)
{
int prev_cpu = getVal(h_ctx->prev_cpu);
int curr_cpu = getVal(h_ctx->curr_cpu);
int sync = getVal(h_ctx->is_sync);
if (libbpf_available_idle_cpu(curr_cpu) &&
libbpf_cpus_share_cache(curr_cpu, prev_cpu))
return libbpf_available_idle_cpu(prev_cpu) ? prev_cpu : curr_cpu;
if (sync && libbpf_nr_running_of(curr_cpu) == 1)
return curr_cpu;
return prev_cpu;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
static void usage(void)
{
printf("USAGE: test sched select core [...]\n");
printf(" -W wakeup affine # Test sched wake wakeup\n");
printf(" -C select core # Test sched select core\n");
printf(" -R select core range # Test sched select core range\n");
printf(" -h # Display this help\n");
}
#define TRACE_DIR "/sys/kernel/debug/tracing/"
#define BUF_SIZE (4096)
/* read trace logs from debug fs */
static void read_trace_pipe(void)
{
int trace_fd;
trace_fd = open(TRACE_DIR "trace_pipe", O_RDONLY, 0);
if (trace_fd < 0)
return;
while (1) {
static char buf[BUF_SIZE];
ssize_t sz;
sz = read(trace_fd, buf, sizeof(buf) - 1);
if (sz > 0) {
buf[sz] = 0;
puts(buf);
}
}
}
int main(int argc, char **argv)
{
int opt;
char filename[256];
char progname[4][256];
struct bpf_object *obj;
struct bpf_program *prog[4] = {NULL};
struct bpf_link *link[4] = {NULL};
int prog_num = 1;
int i = 0;
while ((opt = getopt(argc, argv, "C::R::W::E::")) != -1) {
switch (opt) {
case 'C':
snprintf(progname[0], sizeof(progname[0]), "cfs_select_cpu");
break;
case 'R':
snprintf(progname[0], sizeof(progname[0]), "cfs_select_cpu_range");
snprintf(progname[1], sizeof(progname[1]), "cfs_select_cpu_range_exit");
prog_num = 2;
break;
case 'W':
snprintf(progname[0], sizeof(progname[0]), "cfs_wake_affine");
break;
default:
usage();
goto out;
}
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
goto out;
}
/* load BPF program */
if (bpf_object__load(obj)) {
fprintf(stderr, "ERROR: loading BPF object file failed\n");
goto cleanup;
}
for (i = 0; i < prog_num; i++) {
prog[i] = bpf_object__find_program_by_name(obj, progname[i]);
if (libbpf_get_error(prog[i])) {
fprintf(stderr, "ERROR: finding a prog %d in obj file failed\n", i);
goto cleanup;
}
link[i] = bpf_program__attach(prog[i]);
if (libbpf_get_error(link[i])) {
fprintf(stderr, "ERROR: bpf_program__attach %d failed\n", i);
link[i] = NULL;
goto cleanup;
}
}
printf("select rq BPF started, hit Ctrl+C to stop!\n");
read_trace_pipe();
cleanup:
for (; i >= 0; i--)
bpf_link__destroy(link[i]);
bpf_object__close(obj);
out:
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册