未验证 提交 9bf0bf6b 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!279 sched: programmable: bpf support programmable schedule capacity for scheduler

Merge Pull Request from: @barry19901226 
 
This patchset support programmable schedule capacity.The main modifications are as follows:

1.support basic infrastructure for scheduler bpf.
2.support for loading and attaching scheduler bpf programs.
3.support tag-based programmable schedule capacity.
4.supprot some basic helper functions.
5.add a few more hooks in cfs scheduler to check preemption, pick task, enqueue/dequeue task
and select task rq.
6.provide the helper functions wrapper in libbpf. 
 
Link:https://gitee.com/openeuler/kernel/pulls/279 
Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
...@@ -236,6 +236,7 @@ CONFIG_KALLSYMS_ALL=y ...@@ -236,6 +236,7 @@ CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
CONFIG_KALLSYMS_BASE_RELATIVE=y CONFIG_KALLSYMS_BASE_RELATIVE=y
# CONFIG_BPF_LSM is not set # CONFIG_BPF_LSM is not set
CONFIG_BPF_SCHED=y
CONFIG_BPF_SYSCALL=y CONFIG_BPF_SYSCALL=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_ALWAYS_ON=y
......
...@@ -3646,6 +3646,68 @@ static const struct inode_operations proc_tid_comm_inode_operations = { ...@@ -3646,6 +3646,68 @@ static const struct inode_operations proc_tid_comm_inode_operations = {
.permission = proc_tid_comm_permission, .permission = proc_tid_comm_permission,
}; };
#ifdef CONFIG_BPF_SCHED
static ssize_t pid_tag_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
struct inode *inode = file_inode(file);
struct task_struct *tsk;
char buffer[PROC_NUMBUF];
int err = 0, tag = 0;
tsk = get_proc_task(inode);
if (!tsk)
return -ESRCH;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count)) {
err = -EFAULT;
goto out;
}
err = kstrtoint(strstrip(buffer), 0, &tag);
if (err)
goto out;
sched_settag(tsk, tag);
out:
put_task_struct(tsk);
return err < 0 ? err : count;
}
static int pid_tag_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
struct task_struct *tsk;
tsk = get_proc_task(inode);
if (!tsk)
return -ESRCH;
seq_printf(m, "%ld\n", tsk->tag);
put_task_struct(tsk);
return 0;
}
static int pid_tag_open(struct inode *inode, struct file *flip)
{
return single_open(flip, pid_tag_show, inode);
}
static const struct file_operations proc_pid_tag_operations = {
.open = pid_tag_open,
.read = seq_read,
.write = pid_tag_write,
.llseek = seq_lseek,
.release = single_release,
};
#endif
/* /*
* Tasks * Tasks
*/ */
...@@ -3755,6 +3817,9 @@ static const struct pid_entry tid_base_stuff[] = { ...@@ -3755,6 +3817,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_ASCEND_SHARE_POOL #ifdef CONFIG_ASCEND_SHARE_POOL
ONE("sp_group", 0444, proc_sp_group_state), ONE("sp_group", 0444, proc_sp_group_state),
#endif #endif
#ifdef CONFIG_BPF_SCHED
REG("tag", 0644, proc_pid_tag_operations),
#endif
}; };
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_BPF_SCHED_H
#define _LINUX_BPF_SCHED_H
#include <linux/bpf.h>
#ifdef CONFIG_BPF_SCHED
#include <linux/jump_label.h>
#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...) \
RET bpf_sched_##NAME(__VA_ARGS__);
#include <linux/sched_hook_defs.h>
#undef BPF_SCHED_HOOK
int bpf_sched_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog);
DECLARE_STATIC_KEY_FALSE(bpf_sched_enabled_key);
static inline bool bpf_sched_enabled(void)
{
return static_branch_unlikely(&bpf_sched_enabled_key);
}
static inline void bpf_sched_inc(void)
{
static_branch_inc(&bpf_sched_enabled_key);
}
static inline void bpf_sched_dec(void)
{
static_branch_dec(&bpf_sched_enabled_key);
}
#else /* !CONFIG_BPF_SCHED */
static inline int bpf_sched_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}
static inline bool bpf_sched_enabled(void)
{
return false;
}
#endif /* CONFIG_BPF_SCHED */
#endif /* _LINUX_BPF_SCHED_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef _LINUX_BPF_TOPOLOGY_H
#define _LINUX_BPF_TOPOLOGY_H
#include <linux/cpumask.h>
struct bpf_cpu_topology {
int cpu;
int core_id;
int cluster_id;
int die_id;
int physical_package_id;
int numa_node;
struct cpumask thread_siblings;
struct cpumask core_siblings;
struct cpumask cluster_cpus;
struct cpumask die_cpus;
struct cpumask package_cpus;
struct cpumask node_cpu_lists;
};
struct bpf_cpumask_info {
unsigned int nums_possible_cpus;
unsigned int nums_active_cpus;
unsigned int nums_isolate_cpus;
unsigned int nr_cpu_ids;
unsigned int bpf_nr_cpumask_bits;
struct cpumask cpu_possible_cpumask;
struct cpumask cpu_active_cpumask;
struct cpumask cpu_isolate_cpumask;
};
#endif /* _LINUX_BPF_TOPOLOGY_H */
...@@ -77,6 +77,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, ...@@ -77,6 +77,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
void *, void *) void *, void *)
#endif /* CONFIG_BPF_LSM */ #endif /* CONFIG_BPF_LSM */
#endif #endif
#ifdef CONFIG_BPF_SCHED
BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED, bpf_sched,
void *, void *)
#endif /* CONFIG_BPF_SCHED */
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
......
...@@ -1419,7 +1419,12 @@ struct task_struct { ...@@ -1419,7 +1419,12 @@ struct task_struct {
KABI_RESERVE(4) KABI_RESERVE(4)
KABI_RESERVE(5) KABI_RESERVE(5)
#endif #endif
#ifdef CONFIG_BPF_SCHED
/* Used to pad the tag of a task */
KABI_USE(6, long tag)
#else
KABI_RESERVE(6) KABI_RESERVE(6)
#endif
KABI_RESERVE(7) KABI_RESERVE(7)
KABI_RESERVE(8) KABI_RESERVE(8)
KABI_RESERVE(9) KABI_RESERVE(9)
...@@ -2204,4 +2209,96 @@ static inline int sched_qos_cpu_overload(void) ...@@ -2204,4 +2209,96 @@ static inline int sched_qos_cpu_overload(void)
return 0; return 0;
} }
#endif #endif
#ifdef CONFIG_BPF_SCHED
extern void sched_settag(struct task_struct *tsk, s64 tag);
struct bpf_sched_cpu_stats {
/* load/util */
unsigned long cfs_load_avg;
unsigned long cfs_runnable_avg;
unsigned long cfs_util_avg;
unsigned long rt_load_avg;
unsigned long rt_runnable_avg;
unsigned long rt_util_avg;
unsigned long irq_load_avg;
unsigned long irq_runnable_avg;
unsigned long irq_util_avg;
/* nr_running */
unsigned int nr_running;
unsigned int cfs_nr_running;
unsigned int cfs_h_nr_running;
unsigned int cfs_idle_h_nr_running;
unsigned int rt_nr_running;
unsigned int rr_nr_running;
/* idle statistics */
int available_idle;
unsigned int exit_latency;
unsigned long idle_stamp;
unsigned long avg_idle;
/* capacity */
unsigned long capacity;
unsigned long capacity_orig;
KABI_RESERVE(1)
KABI_RESERVE(2)
KABI_RESERVE(3)
KABI_RESERVE(4)
};
struct cpumask_op_args {
unsigned int op_type;
void *arg1;
void *arg2;
void *arg3;
void *arg4;
};
enum cpumask_op_type {
CPUMASK_EMPTY,
CPUMASK_AND,
CPUMASK_ANDNOT,
CPUMASK_SUBSET,
CPUMASK_EQUAL,
CPUMASK_TEST_CPU,
CPUMASK_COPY,
CPUMASK_WEIGHT,
CPUMASK_NEXT,
CPUMASK_NEXT_WRAP,
CPUMASK_NEXT_AND,
CPUMASK_CPULIST_PARSE
};
struct sched_migrate_ctx {
struct task_struct *task;
struct cpumask *select_idle_mask;
int prev_cpu;
int curr_cpu;
int is_sync;
int want_affine;
int wake_flags;
int sd_flag;
int new_cpu;
KABI_RESERVE(1)
KABI_RESERVE(2)
KABI_RESERVE(3)
KABI_RESERVE(4)
};
struct sched_affine_ctx {
struct task_struct *task;
int prev_cpu;
int curr_cpu;
int is_sync;
KABI_RESERVE(1)
KABI_RESERVE(2)
KABI_RESERVE(3)
KABI_RESERVE(4)
};
#endif
#endif #endif
/* SPDX-License-Identifier: GPL-2.0 */
BPF_SCHED_HOOK(int, 0, cfs_check_preempt_tick, struct sched_entity *curr, unsigned long delta_exec)
BPF_SCHED_HOOK(int, 0, cfs_check_preempt_wakeup, struct task_struct *curr, struct task_struct *p)
BPF_SCHED_HOOK(int, 0, cfs_wakeup_preempt_entity, struct sched_entity *curr,
struct sched_entity *se)
BPF_SCHED_HOOK(int, 0, cfs_tag_pick_next_entity, struct sched_entity *curr,
struct sched_entity *next)
BPF_SCHED_HOOK(void, (void) 0, cfs_enqueue_task, struct rq *rq, struct task_struct *p)
BPF_SCHED_HOOK(void, (void) 0, cfs_dequeue_task, struct rq *rq, struct task_struct *p)
BPF_SCHED_HOOK(int, -1, cfs_select_rq, struct sched_migrate_ctx *ctx)
BPF_SCHED_HOOK(int, -1, cfs_wake_affine, struct sched_affine_ctx *ctx)
BPF_SCHED_HOOK(int, -1, cfs_select_rq_exit, struct sched_migrate_ctx *ctx)
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/bpf_common.h> #include <linux/bpf_common.h>
#include <linux/kabi.h>
/* Extended instruction set based on top of classic BPF */ /* Extended instruction set based on top of classic BPF */
...@@ -199,6 +200,7 @@ enum bpf_prog_type { ...@@ -199,6 +200,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SK_LOOKUP,
KABI_EXTEND_ENUM(BPF_PROG_TYPE_SCHED)
}; };
enum bpf_attach_type { enum bpf_attach_type {
...@@ -240,6 +242,7 @@ enum bpf_attach_type { ...@@ -240,6 +242,7 @@ enum bpf_attach_type {
BPF_XDP_CPUMAP, BPF_XDP_CPUMAP,
BPF_SK_LOOKUP, BPF_SK_LOOKUP,
BPF_XDP, BPF_XDP,
KABI_BROKEN_INSERT_ENUM(BPF_SCHED)
__MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
}; };
...@@ -3755,6 +3758,117 @@ union bpf_attr { ...@@ -3755,6 +3758,117 @@ union bpf_attr {
* Get Ipv4 origdst or replysrc. Works with IPv4. * Get Ipv4 origdst or replysrc. Works with IPv4.
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
*
* long bpf_sched_tg_tag_of(struct task_group *tg)
* Description
* Return task group tag of *tg* if CONFIG_CGROUP_SCHED enabled.
* The bpf prog obtains the tags to detect different workloads.
* Return
* Task group tag, if CONFIG_CGROUP_SCHED enabled, 0 as default tag, or
* a negative error in case of failure.
*
* long bpf_sched_task_tag_of(struct task_struct *tsk)
* Description
* Return task tag of *tsk*.The bpf prog obtains the tags to detect
* different workloads.
* Return
* Task tag, if used, 0 as default tag, or a negative error in case of failure.
*
* int bpf_sched_set_tg_tag(struct task_group *tg, s64 tag)
* Description
* Set tag to *tg* and its descendants.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_sched_set_task_tag(struct task_struct *tsk, s64 tag)
* Description
* Set tag to *tsk*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_sched_cpu_stats_of(int cpu, struct bpf_sched_cpu_stats *ctx, int len)
* Description
* Get multiple types of *cpu* statistics and store in *ctx*.
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_init_cpu_topology(struct bpf_map *map)
* Description
* Initializing the cpu topology which used for bpf prog.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_get_cpumask_info(struct bpf_map *map, struct bpf_cpumask_info *cpus)
* Description
* Get system cpus returned in *cpus*.
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_sched_entity_is_task(struct sched_entity *se)
* Description
* Checks whether the sched entity is a task.
* Return
* 1 if true, 0 otherwise.
*
* struct task_struct *bpf_sched_entity_to_task(struct sched_entity *se)
* Description
* Return task struct of *se* if se is a task.
* Return
* Task struct if se is a task, NULL otherwise.
*
* struct task_group *bpf_sched_entity_to_tg(struct sched_entity *se)
* Description
* Return task group of *se* if se is a task group.
* Return
* Task struct if se is a task group, NULL otherwise.
*
* int bpf_cpumask_op(struct cpumask_op_args *op, int len)
* Description
* A series of cpumask-related operations. Perform different
* operations base on *op*->type. User also need fill other
* *op* field base on *op*->type. *op*->type is one of them
*
* **CPUMASK_EMPTY**
* *(op->arg1) == 0 returned.
* **CPUMASK_AND**
* *(op->arg1) = *(op->arg2) & *(op->arg3)
* **CPUMASK_ANDNOT**
* *(op->arg1) = *(op->arg2) & ~*(op->arg3)
* **CPUMASK_SUBSET**
* *(op->arg1) & ~*(op->arg2) == 0 returned
* **CPUMASK_EQUAL**
* *(op->arg1) == *(op->arg2) returned
* **CPUMASK_TEST_CPU**
* test for a cpu *(int)(op->arg1) in *(op->arg2)
* returns 1 if *op*->arg1 is set in *op*->arg2, else returns 0
* **CPUMASK_COPY**
* *(op->arg1) = *(op->arg2), return 0 always
* **CPUMASK_WEIGHT**
* count of bits in *(op->arg1)
* **CPUMASK_NEXT**
* get the next cpu in *(struct cpumask *)(op->arg2)
* *(int *)(op->arg1): the cpu prior to the place to search
* **CPUMASK_NEXT_WRAP**
* helper to implement for_each_cpu_wrap
* @op->arg1: the cpu prior to the place to search
* @op->arg2: the cpumask pointer
* @op->arg3: the start point of the iteration
* @op->arg4: assume @op->arg1 crossing @op->arg3 terminates the iteration
* returns >= nr_cpu_ids on completion
* **CPUMASK_NEXT_AND**
* get the next cpu in *(op->arg1) & *(op->arg2)
* **CPUMASK_CPULIST_PARSE**
* extract a cpumask from a user string of ranges.
* (char *)op->arg1 -> (struct cpumask *)(op->arg2)
* 0 on success, or a negative error in case of failure.
* Return
* View above.
*
* int bpf_cpus_share_cache(int src_cpu, int dst_cpu)
* Description
* check src_cpu whether share cache with dst_cpu.
* Return
* yes 1, no 0.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -3915,6 +4029,18 @@ union bpf_attr { ...@@ -3915,6 +4029,18 @@ union bpf_attr {
FN(redirect_peer), \ FN(redirect_peer), \
FN(get_sockops_uid_gid), \ FN(get_sockops_uid_gid), \
FN(sk_original_addr), \ FN(sk_original_addr), \
FN(sched_tg_tag_of), \
FN(sched_task_tag_of), \
FN(sched_set_tg_tag), \
FN(sched_set_task_tag), \
FN(sched_cpu_stats_of), \
FN(init_cpu_topology), \
FN(get_cpumask_info), \
FN(sched_entity_is_task), \
FN(sched_entity_to_task), \
FN(sched_entity_to_tg), \
FN(cpumask_op), \
FN(cpus_share_cache), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -1760,6 +1760,16 @@ config BPF_LSM ...@@ -1760,6 +1760,16 @@ config BPF_LSM
If you are unsure how to answer this question, answer N. If you are unsure how to answer this question, answer N.
config BPF_SCHED
bool "SCHED Instrumentation with BPF"
depends on BPF_EVENTS
depends on BPF_SYSCALL
help
Enables instrumentation of the sched hooks with eBPF programs for
implementing dynamic scheduling policies.
If you are unsure how to answer this question, answer N.
config BPF_SYSCALL config BPF_SYSCALL
bool "Enable bpf() system call" bool "Enable bpf() system call"
select BPF select BPF
......
...@@ -216,6 +216,9 @@ struct task_struct init_task ...@@ -216,6 +216,9 @@ struct task_struct init_task
#endif #endif
#ifdef CONFIG_SECCOMP_FILTER #ifdef CONFIG_SECCOMP_FILTER
.seccomp = { .filter_count = ATOMIC_INIT(0) }, .seccomp = { .filter_count = ATOMIC_INIT(0) },
#endif
#ifdef CONFIG_BPF_SCHED
.tag = 0,
#endif #endif
._resvd = &init_task_struct_resvd, ._resvd = &init_task_struct_resvd,
}; };
......
...@@ -4479,6 +4479,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, ...@@ -4479,6 +4479,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
return true; return true;
t = btf_type_by_id(btf, t->type); t = btf_type_by_id(btf, t->type);
break; break;
case BPF_SCHED:
case BPF_MODIFY_RETURN: case BPF_MODIFY_RETURN:
/* For now the BPF_MODIFY_RETURN can only be attached to /* For now the BPF_MODIFY_RETURN can only be attached to
* functions that return an int. * functions that return an int.
......
...@@ -658,6 +658,10 @@ const struct bpf_func_proto bpf_probe_read_user_proto __weak; ...@@ -658,6 +658,10 @@ const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
const struct bpf_func_proto bpf_sched_tg_tag_of_proto __weak;
const struct bpf_func_proto bpf_sched_task_tag_of_proto __weak;
const struct bpf_func_proto bpf_sched_set_tg_tag_proto __weak;
const struct bpf_func_proto bpf_sched_set_task_tag_proto __weak;
const struct bpf_func_proto * const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id) bpf_base_func_proto(enum bpf_func_id func_id)
...@@ -697,6 +701,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) ...@@ -697,6 +701,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_discard_proto; return &bpf_ringbuf_discard_proto;
case BPF_FUNC_ringbuf_query: case BPF_FUNC_ringbuf_query:
return &bpf_ringbuf_query_proto; return &bpf_ringbuf_query_proto;
case BPF_FUNC_sched_tg_tag_of:
return &bpf_sched_tg_tag_of_proto;
case BPF_FUNC_sched_task_tag_of:
return &bpf_sched_task_tag_of_proto;
default: default:
break; break;
} }
...@@ -715,6 +723,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) ...@@ -715,6 +723,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_per_cpu_ptr_proto; return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_this_cpu_ptr: case BPF_FUNC_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto; return &bpf_this_cpu_ptr_proto;
case BPF_FUNC_sched_set_tg_tag:
return &bpf_sched_set_tg_tag_proto;
case BPF_FUNC_sched_set_task_tag:
return &bpf_sched_set_task_tag_proto;
default: default:
break; break;
} }
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/poll.h> #include <linux/poll.h>
#include <linux/bpf-netns.h> #include <linux/bpf-netns.h>
#include <linux/rcupdate_trace.h> #include <linux/rcupdate_trace.h>
#include <linux/bpf_sched.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
...@@ -1997,6 +1998,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, ...@@ -1997,6 +1998,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_LSM:
case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_STRUCT_OPS:
case BPF_PROG_TYPE_EXT: case BPF_PROG_TYPE_EXT:
case BPF_PROG_TYPE_SCHED:
break; break;
default: default:
return -EINVAL; return -EINVAL;
...@@ -2108,6 +2110,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) ...@@ -2108,6 +2110,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_LSM:
case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
case BPF_PROG_TYPE_EXT: /* extends any prog */ case BPF_PROG_TYPE_EXT: /* extends any prog */
case BPF_PROG_TYPE_SCHED:
return true; return true;
default: default:
return false; return false;
...@@ -2529,6 +2532,11 @@ static void bpf_tracing_link_release(struct bpf_link *link) ...@@ -2529,6 +2532,11 @@ static void bpf_tracing_link_release(struct bpf_link *link)
struct bpf_tracing_link *tr_link = struct bpf_tracing_link *tr_link =
container_of(link, struct bpf_tracing_link, link); container_of(link, struct bpf_tracing_link, link);
#ifdef CONFIG_BPF_SCHED
if (link->prog->type == BPF_PROG_TYPE_SCHED)
bpf_sched_dec();
#endif
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
tr_link->trampoline)); tr_link->trampoline));
...@@ -2608,6 +2616,12 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, ...@@ -2608,6 +2616,12 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
goto out_put_prog; goto out_put_prog;
} }
break; break;
case BPF_PROG_TYPE_SCHED:
if (prog->expected_attach_type != BPF_SCHED) {
err = -EINVAL;
goto out_put_prog;
}
break;
default: default:
err = -EINVAL; err = -EINVAL;
goto out_put_prog; goto out_put_prog;
...@@ -2710,6 +2724,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, ...@@ -2710,6 +2724,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
goto out_unlock; goto out_unlock;
} }
#ifdef CONFIG_BPF_SCHED
if (prog->type == BPF_PROG_TYPE_SCHED)
bpf_sched_inc();
#endif
link->tgt_prog = tgt_prog; link->tgt_prog = tgt_prog;
link->trampoline = tr; link->trampoline = tr;
...@@ -2838,6 +2857,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) ...@@ -2838,6 +2857,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_EXT: case BPF_PROG_TYPE_EXT:
case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_LSM:
case BPF_PROG_TYPE_SCHED:
if (attr->raw_tracepoint.name) { if (attr->raw_tracepoint.name) {
/* The attach point for this category of programs /* The attach point for this category of programs
* should be specified via btf_id during program load. * should be specified via btf_id during program load.
......
...@@ -357,6 +357,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) ...@@ -357,6 +357,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
switch (prog->expected_attach_type) { switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY: case BPF_TRACE_FENTRY:
return BPF_TRAMP_FENTRY; return BPF_TRAMP_FENTRY;
case BPF_SCHED:
case BPF_MODIFY_RETURN: case BPF_MODIFY_RETURN:
return BPF_TRAMP_MODIFY_RETURN; return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT: case BPF_TRACE_FEXIT:
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/error-injection.h> #include <linux/error-injection.h>
#include <linux/bpf_lsm.h> #include <linux/bpf_lsm.h>
#include <linux/btf_ids.h> #include <linux/btf_ids.h>
#include <linux/bpf_sched.h>
#include "disasm.h" #include "disasm.h"
...@@ -5018,10 +5019,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) ...@@ -5018,10 +5019,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
int i; int i;
for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false; return false;
if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
return false; return false;
} }
...@@ -12155,6 +12156,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, ...@@ -12155,6 +12156,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
case BPF_LSM_MAC: case BPF_LSM_MAC:
case BPF_TRACE_FENTRY: case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT: case BPF_TRACE_FEXIT:
case BPF_SCHED:
if (!btf_type_is_func(t)) { if (!btf_type_is_func(t)) {
bpf_log(log, "attach_btf_id %u is not a function\n", bpf_log(log, "attach_btf_id %u is not a function\n",
btf_id); btf_id);
...@@ -12260,7 +12262,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) ...@@ -12260,7 +12262,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
if (prog->type != BPF_PROG_TYPE_TRACING && if (prog->type != BPF_PROG_TYPE_TRACING &&
prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_LSM &&
prog->type != BPF_PROG_TYPE_EXT) prog->type != BPF_PROG_TYPE_EXT &&
prog->type != BPF_PROG_TYPE_SCHED)
return 0; return 0;
ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info); ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
...@@ -12300,6 +12303,12 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) ...@@ -12300,6 +12303,12 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return ret; return ret;
} }
if (prog->type == BPF_PROG_TYPE_SCHED) {
ret = bpf_sched_verify_prog(&env->log, prog);
if (ret < 0)
return ret;
}
key = bpf_trampoline_compute_key(tgt_prog, btf_id); key = bpf_trampoline_compute_key(tgt_prog, btf_id);
tr = bpf_trampoline_get(key, &tgt_info); tr = bpf_trampoline_get(key, &tgt_info);
if (!tr) if (!tr)
......
...@@ -37,3 +37,5 @@ obj-$(CONFIG_MEMBARRIER) += membarrier.o ...@@ -37,3 +37,5 @@ obj-$(CONFIG_MEMBARRIER) += membarrier.o
obj-$(CONFIG_CPU_ISOLATION) += isolation.o obj-$(CONFIG_CPU_ISOLATION) += isolation.o
obj-$(CONFIG_PSI) += psi.o obj-$(CONFIG_PSI) += psi.o
obj-$(CONFIG_SCHED_CORE) += core_sched.o obj-$(CONFIG_SCHED_CORE) += core_sched.o
obj-$(CONFIG_BPF_SCHED) += bpf_sched.o
obj-$(CONFIG_BPF_SCHED) += bpf_topology.o
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/cgroup.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf_sched.h>
#include <linux/btf_ids.h>
#include <linux/bpf_topology.h>
#include "sched.h"
DEFINE_STATIC_KEY_FALSE(bpf_sched_enabled_key);
/*
* For every hook declare a nop function where a BPF program can be attached.
*/
#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...) \
noinline RET bpf_sched_##NAME(__VA_ARGS__) \
{ \
return DEFAULT; \
}
#include <linux/sched_hook_defs.h>
#undef BPF_SCHED_HOOK
#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_sched_##NAME)
BTF_SET_START(bpf_sched_hooks)
#include <linux/sched_hook_defs.h>
#undef BPF_SCHED_HOOK
BTF_SET_END(bpf_sched_hooks)
const struct bpf_func_proto bpf_init_cpu_topology_proto __weak;
const struct bpf_func_proto bpf_get_cpumask_info_proto __weak;
int bpf_sched_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
{
if (!prog->gpl_compatible) {
bpf_log(vlog,
"sched programs must have a GPL compatible license\n");
return -EINVAL;
}
if (!btf_id_set_contains(&bpf_sched_hooks, prog->aux->attach_btf_id)) {
bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
prog->aux->attach_btf_id, prog->aux->attach_func_name);
return -EINVAL;
}
return 0;
}
BPF_CALL_3(bpf_sched_cpu_stats_of, int, cpu,
struct bpf_sched_cpu_stats *, ctx,
int, len)
{
struct cpuidle_state *idle;
struct rq *rq;
if (len != sizeof(*ctx))
return -EINVAL;
if ((unsigned int)cpu >= nr_cpu_ids)
return -EINVAL;
rq = cpu_rq(cpu);
memset(ctx, 0, sizeof(struct bpf_sched_cpu_stats));
/* load/util */
#ifdef CONFIG_SMP
SCHED_WARN_ON(!rcu_read_lock_held());
ctx->cfs_load_avg = rq->cfs.avg.load_avg;
ctx->cfs_runnable_avg = rq->cfs.avg.runnable_avg;
ctx->cfs_util_avg = rq->cfs.avg.util_avg;
ctx->rt_load_avg = rq->avg_rt.load_avg;
ctx->rt_runnable_avg = rq->avg_rt.runnable_avg;
ctx->rt_util_avg = rq->avg_rt.util_avg;
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
ctx->irq_load_avg = rq->avg_irq.load_avg;
ctx->irq_runnable_avg = rq->avg_irq.runnable_avg;
ctx->irq_util_avg = rq->avg_irq.util_avg;
#endif
#endif
/* nr_running */
ctx->nr_running = rq->nr_running;
ctx->cfs_nr_running = rq->cfs.nr_running;
ctx->cfs_h_nr_running = rq->cfs.h_nr_running;
ctx->cfs_idle_h_nr_running = rq->cfs.idle_h_nr_running;
ctx->rt_nr_running = rq->rt.rt_nr_running;
ctx->rr_nr_running = rq->rt.rr_nr_running;
/* idle statistics */
ctx->available_idle = available_idle_cpu(cpu);
idle = idle_get_state(rq);
if (idle)
ctx->exit_latency = idle->exit_latency;
#ifdef CONFIG_SMP
ctx->idle_stamp = rq->idle_stamp;
ctx->avg_idle = rq->avg_idle;
#endif
/* capacity */
#ifdef CONFIG_SMP
ctx->capacity = rq->cpu_capacity;
ctx->capacity_orig = rq->cpu_capacity_orig;
#endif
return 0;
}
static const struct bpf_func_proto bpf_sched_cpu_stats_of_proto = {
.func = bpf_sched_cpu_stats_of,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BTF_ID_LIST_SINGLE(btf_sched_entity_ids, struct, sched_entity)
BTF_ID_LIST_SINGLE(btf_sched_task_ids, struct, task_struct)
BTF_ID_LIST_SINGLE(btf_sched_tg_ids, struct, task_group)
BPF_CALL_1(bpf_sched_entity_is_task, struct sched_entity *, se)
{
return entity_is_task(se) ? 1 : 0;
}
static const struct bpf_func_proto bpf_sched_entity_is_task_proto = {
.func = bpf_sched_entity_is_task,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sched_entity_ids[0],
};
BPF_CALL_1(bpf_sched_entity_to_task, struct sched_entity *, se)
{
if (entity_is_task(se)) {
struct task_struct *tsk = task_of(se);
return (unsigned long)tsk;
}
return (unsigned long)NULL;
}
static const struct bpf_func_proto bpf_sched_entity_to_task_proto = {
.func = bpf_sched_entity_to_task,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.ret_btf_id = &btf_sched_task_ids[0],
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sched_entity_ids[0],
};
BPF_CALL_1(bpf_sched_entity_to_tg, struct sched_entity *, se)
{
#if CONFIG_FAIR_GROUP_SCHED
if (!entity_is_task(se)) {
struct task_group *tg = group_cfs_rq(se)->tg;
return (unsigned long)tg;
}
#endif
return (unsigned long)NULL;
}
static const struct bpf_func_proto bpf_sched_entity_to_tg_proto = {
.func = bpf_sched_entity_to_tg,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.ret_btf_id = &btf_sched_tg_ids[0],
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sched_entity_ids[0],
};
BPF_CALL_2(bpf_cpumask_op, struct cpumask_op_args *, op, int, len)
{
int ret;
if (len != sizeof(*op) || !op->arg1)
return -EINVAL;
switch (op->op_type) {
case CPUMASK_EMPTY:
return cpumask_empty((const struct cpumask *)op->arg1);
case CPUMASK_AND:
if (!op->arg2 || !op->arg3)
return -EINVAL;
return cpumask_and((struct cpumask *)op->arg1,
(const struct cpumask *)op->arg2,
(const struct cpumask *)op->arg3);
case CPUMASK_ANDNOT:
if (!op->arg2 || !op->arg3)
return -EINVAL;
cpumask_andnot((struct cpumask *)op->arg1,
(const struct cpumask *)op->arg2,
(const struct cpumask *)op->arg3);
break;
case CPUMASK_SUBSET:
if (!op->arg2)
return -EINVAL;
return cpumask_subset((const struct cpumask *)op->arg1,
(const struct cpumask *)op->arg2);
case CPUMASK_EQUAL:
if (!op->arg2)
return -EINVAL;
return cpumask_equal((const struct cpumask *)op->arg1,
(const struct cpumask *)op->arg2);
case CPUMASK_TEST_CPU:
if (!op->arg2)
return -EINVAL;
return cpumask_test_cpu(*(int *)op->arg1, op->arg2);
case CPUMASK_COPY:
if (!op->arg2)
return -EINVAL;
cpumask_copy((struct cpumask *)op->arg1,
(const struct cpumask *)op->arg2);
break;
case CPUMASK_WEIGHT:
return cpumask_weight((const struct cpumask *)op->arg1);
case CPUMASK_NEXT:
if (!op->arg2)
return -EINVAL;
return cpumask_next(*(int *)op->arg1,
(const struct cpumask *)op->arg2);
case CPUMASK_NEXT_WRAP:
if (!op->arg2 || !op->arg3 || !op->arg4)
return -EINVAL;
return cpumask_next_wrap(*(int *)op->arg1,
(const struct cpumask *)op->arg2,
*(int *)op->arg3, *(int *)op->arg4);
case CPUMASK_NEXT_AND:
if (!op->arg2 || !op->arg3)
return -EINVAL;
return cpumask_next_and(*(int *)op->arg1,
(const struct cpumask *)op->arg2,
(const struct cpumask *)op->arg3);
case CPUMASK_CPULIST_PARSE:
if (!op->arg2)
return -EINVAL;
op->arg1 = (void *)strstrip((void *)op->arg1);
ret = cpulist_parse((void *)op->arg1,
(struct cpumask *)op->arg2);
return ret;
default:
return -EINVAL;
}
return 0;
}
static const struct bpf_func_proto bpf_cpumask_op_proto = {
.func = bpf_cpumask_op,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
};
BPF_CALL_2(bpf_cpus_share_cache, int, src_cpu, int, dst_cpu)
{
if ((unsigned int)src_cpu >= nr_cpu_ids ||
(unsigned int)dst_cpu >= nr_cpu_ids)
return 0;
return cpus_share_cache(src_cpu, dst_cpu);
}
static const struct bpf_func_proto bpf_cpus_share_cache_proto = {
.func = bpf_cpus_share_cache,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_trace_printk:
return bpf_get_trace_printk_proto();
case BPF_FUNC_sched_cpu_stats_of:
return &bpf_sched_cpu_stats_of_proto;
case BPF_FUNC_init_cpu_topology:
return &bpf_init_cpu_topology_proto;
case BPF_FUNC_get_cpumask_info:
return &bpf_get_cpumask_info_proto;
case BPF_FUNC_sched_entity_is_task:
return &bpf_sched_entity_is_task_proto;
case BPF_FUNC_sched_entity_to_task:
return &bpf_sched_entity_to_task_proto;
case BPF_FUNC_sched_entity_to_tg:
return &bpf_sched_entity_to_tg_proto;
case BPF_FUNC_cpumask_op:
return &bpf_cpumask_op_proto;
case BPF_FUNC_cpus_share_cache:
return &bpf_cpus_share_cache_proto;
default:
return bpf_base_func_proto(func_id);
}
}
const struct bpf_prog_ops bpf_sched_prog_ops = {
};
const struct bpf_verifier_ops bpf_sched_verifier_ops = {
.get_func_proto = bpf_sched_func_proto,
.is_valid_access = btf_ctx_access,
};
BPF_CALL_1(bpf_sched_tg_tag_of, struct task_group *, tg)
{
int ret = 0;
#ifdef CONFIG_CGROUP_SCHED
if (tg == NULL)
return -EINVAL;
ret = tg->tag;
#endif
return ret;
}
const struct bpf_func_proto bpf_sched_tg_tag_of_proto = {
.func = bpf_sched_tg_tag_of,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sched_tg_ids[0],
};
BPF_CALL_1(bpf_sched_task_tag_of, struct task_struct *, tsk)
{
if (tsk == NULL)
return -EINVAL;
return tsk->tag;
}
const struct bpf_func_proto bpf_sched_task_tag_of_proto = {
.func = bpf_sched_task_tag_of,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sched_task_ids[0],
};
BPF_CALL_2(bpf_sched_set_tg_tag, struct task_group *, tg, s64, tag)
{
#if CONFIG_CGROUP_SCHED
if (tg == NULL || tg == &root_task_group)
return -EINVAL;
if (tg->tag == tag)
return 0;
rcu_read_lock();
walk_tg_tree_from(tg, tg_change_tag, tg_nop, (void *)(&tag));
rcu_read_unlock();
return 0;
#endif
return -EPERM;
}
const struct bpf_func_proto bpf_sched_set_tg_tag_proto = {
.func = bpf_sched_set_tg_tag,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sched_tg_ids[0],
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_2(bpf_sched_set_task_tag, struct task_struct *, tsk, s64, tag)
{
if (tsk == NULL)
return -EINVAL;
sched_settag(tsk, tag);
return 0;
}
const struct bpf_func_proto bpf_sched_set_task_tag_proto = {
.func = bpf_sched_set_task_tag,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_sched_task_ids[0],
.arg2_type = ARG_ANYTHING,
};
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/bpf.h>
#include <linux/btf_ids.h>
#include <linux/bpf_verifier.h>
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <linux/bpf_topology.h>
#include <linux/sched/isolation.h>
static void bpf_update_cpu_topology(struct bpf_cpu_topology *cpu_topology, int cpu)
{
cpu_topology->cpu = cpu;
cpu_topology->core_id = topology_core_id(cpu);
cpu_topology->cluster_id = topology_cluster_id(cpu);
cpu_topology->die_id = topology_die_id(cpu);
cpu_topology->physical_package_id = topology_physical_package_id(cpu);
cpu_topology->numa_node = cpu_to_node(cpu);
cpumask_copy(&cpu_topology->thread_siblings, topology_sibling_cpumask(cpu));
cpumask_copy(&cpu_topology->core_siblings, topology_core_cpumask(cpu));
cpumask_copy(&cpu_topology->cluster_cpus, topology_cluster_cpumask(cpu));
cpumask_copy(&cpu_topology->die_cpus, topology_die_cpumask(cpu));
cpumask_copy(&cpu_topology->package_cpus, topology_core_cpumask(cpu));
cpumask_copy(&cpu_topology->node_cpu_lists, cpumask_of_node(cpu_to_node(cpu)));
}
BPF_CALL_1(bpf_init_cpu_topology, struct bpf_map *, map)
{
const struct cpumask *cpu_map = cpu_active_mask;
struct bpf_cpu_topology *topo;
int i = -1;
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
for_each_cpu(i, cpu_map) {
topo = map->ops->map_lookup_elem(map, &i);
if (!topo)
return -ENOMEM;
bpf_update_cpu_topology(topo, i);
}
return 0;
}
BTF_ID_LIST_SINGLE(bpf_cpu_topology_ids, struct, bpf_cpu_topology)
const struct bpf_func_proto bpf_init_cpu_topology_proto = {
.func = bpf_init_cpu_topology,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
};
BPF_CALL_2(bpf_get_cpumask_info, struct bpf_map *, map, struct bpf_cpumask_info *, cpus)
{
if (!cpus)
return -EINVAL;
cpumask_copy(&cpus->cpu_possible_cpumask, cpu_possible_mask);
cpumask_copy(&cpus->cpu_active_cpumask, cpu_active_mask);
cpumask_copy(&cpus->cpu_isolate_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN));
cpus->nums_possible_cpus = num_possible_cpus();
cpus->nums_active_cpus = num_active_cpus();
cpus->nums_isolate_cpus = cpumask_weight(&cpus->cpu_isolate_cpumask);
cpus->nr_cpu_ids = nr_cpu_ids;
cpus->bpf_nr_cpumask_bits = nr_cpumask_bits;
return 0;
}
const struct bpf_func_proto bpf_get_cpumask_info_proto = {
.func = bpf_get_cpumask_info,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
};
...@@ -3477,6 +3477,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -3477,6 +3477,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
p->wake_entry.u_flags = CSD_TYPE_TTWU; p->wake_entry.u_flags = CSD_TYPE_TTWU;
#endif #endif
#ifdef CONFIG_BPF_SCHED
p->tag = 0;
#endif
} }
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
...@@ -8600,6 +8603,13 @@ static void sched_free_group(struct task_group *tg) ...@@ -8600,6 +8603,13 @@ static void sched_free_group(struct task_group *tg)
kmem_cache_free(task_group_cache, tg); kmem_cache_free(task_group_cache, tg);
} }
#ifdef CONFIG_BPF_SCHED
static inline void tg_init_tag(struct task_group *tg, struct task_group *ptg)
{
tg->tag = ptg->tag;
}
#endif
/* allocate runqueue etc for a new task group */ /* allocate runqueue etc for a new task group */
struct task_group *sched_create_group(struct task_group *parent) struct task_group *sched_create_group(struct task_group *parent)
{ {
...@@ -8620,6 +8630,10 @@ struct task_group *sched_create_group(struct task_group *parent) ...@@ -8620,6 +8630,10 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_rt_sched_group(tg, parent)) if (!alloc_rt_sched_group(tg, parent))
goto err; goto err;
#ifdef CONFIG_BPF_SCHED
tg_init_tag(tg, parent);
#endif
alloc_uclamp_sched_group(tg, parent); alloc_uclamp_sched_group(tg, parent);
return tg; return tg;
...@@ -8691,6 +8705,14 @@ static void sched_change_group(struct task_struct *tsk, int type) ...@@ -8691,6 +8705,14 @@ static void sched_change_group(struct task_struct *tsk, int type)
sched_change_qos_group(tsk, tg); sched_change_qos_group(tsk, tg);
#endif #endif
#ifdef CONFIG_BPF_SCHED
/*
* This function has cleared and restored the task status,
* so we do not need to dequeue and enqueue the task again.
*/
tsk->tag = tg->tag;
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
if (tsk->sched_class->task_change_group) if (tsk->sched_class->task_change_group)
tsk->sched_class->task_change_group(tsk, type); tsk->sched_class->task_change_group(tsk, type);
...@@ -9463,6 +9485,80 @@ static inline s64 cpu_qos_read(struct cgroup_subsys_state *css, ...@@ -9463,6 +9485,80 @@ static inline s64 cpu_qos_read(struct cgroup_subsys_state *css,
} }
#endif #endif
#ifdef CONFIG_BPF_SCHED
void sched_settag(struct task_struct *tsk, s64 tag)
{
int queued, running, queue_flags =
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
struct rq_flags rf;
struct rq *rq;
if (tsk->tag == tag)
return;
rq = task_rq_lock(tsk, &rf);
running = task_current(rq, tsk);
queued = task_on_rq_queued(tsk);
update_rq_clock(rq);
if (queued)
dequeue_task(rq, tsk, queue_flags);
if (running)
put_prev_task(rq, tsk);
tsk->tag = tag;
if (queued)
enqueue_task(rq, tsk, queue_flags);
if (running)
set_next_task(rq, tsk);
task_rq_unlock(rq, tsk, &rf);
}
int tg_change_tag(struct task_group *tg, void *data)
{
struct css_task_iter it;
struct task_struct *tsk;
s64 tag = *(s64 *)data;
struct cgroup_subsys_state *css = &tg->css;
tg->tag = tag;
css_task_iter_start(css, 0, &it);
while ((tsk = css_task_iter_next(&it)))
sched_settag(tsk, tag);
css_task_iter_end(&it);
return 0;
}
static int cpu_tag_write(struct cgroup_subsys_state *css,
struct cftype *cftype, s64 tag)
{
struct task_group *tg = css_tg(css);
if (tg == &root_task_group)
return -EINVAL;
if (tg->tag == tag)
return 0;
rcu_read_lock();
walk_tg_tree_from(tg, tg_change_tag, tg_nop, (void *)(&tag));
rcu_read_unlock();
return 0;
}
static inline s64 cpu_tag_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return css_tg(css)->tag;
}
#endif
static struct cftype cpu_legacy_files[] = { static struct cftype cpu_legacy_files[] = {
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
{ {
...@@ -9524,6 +9620,13 @@ static struct cftype cpu_legacy_files[] = { ...@@ -9524,6 +9620,13 @@ static struct cftype cpu_legacy_files[] = {
.read_s64 = cpu_qos_read, .read_s64 = cpu_qos_read,
.write_s64 = cpu_qos_write, .write_s64 = cpu_qos_write,
}, },
#endif
#ifdef CONFIG_BPF_SCHED
{
.name = "tag",
.read_s64 = cpu_tag_read,
.write_s64 = cpu_tag_write,
},
#endif #endif
{ } /* Terminate */ { } /* Terminate */
}; };
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/tracehook.h> #include <linux/tracehook.h>
#endif #endif
#include <linux/bpf_sched.h>
/* /*
* Targeted preemption latency for CPU-bound tasks: * Targeted preemption latency for CPU-bound tasks:
...@@ -508,6 +509,15 @@ static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime) ...@@ -508,6 +509,15 @@ static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
static inline bool entity_before(struct sched_entity *a, static inline bool entity_before(struct sched_entity *a,
struct sched_entity *b) struct sched_entity *b)
{ {
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled()) {
int ret = bpf_sched_cfs_tag_pick_next_entity(a, b);
if (ret == 1)
return 1;
}
#endif
return (s64)(a->vruntime - b->vruntime) < 0; return (s64)(a->vruntime - b->vruntime) < 0;
} }
...@@ -4434,6 +4444,21 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ...@@ -4434,6 +4444,21 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
ideal_runtime = sched_slice(cfs_rq, curr); ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled()) {
int ret = bpf_sched_cfs_check_preempt_tick(curr, delta_exec);
if (ret < 0)
return;
else if (ret > 0) {
resched_curr(rq_of(cfs_rq));
clear_buddies(cfs_rq, curr);
return;
}
}
#endif
if (delta_exec > ideal_runtime) { if (delta_exec > ideal_runtime) {
resched_curr(rq_of(cfs_rq)); resched_curr(rq_of(cfs_rq));
/* /*
...@@ -5678,6 +5703,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5678,6 +5703,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
assert_list_leaf_cfs_rq(rq); assert_list_leaf_cfs_rq(rq);
hrtick_update(rq); hrtick_update(rq);
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled())
bpf_sched_cfs_enqueue_task(rq, p);
#endif
} }
static void set_next_buddy(struct sched_entity *se); static void set_next_buddy(struct sched_entity *se);
...@@ -5752,6 +5782,11 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5752,6 +5782,11 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
dequeue_throttle: dequeue_throttle:
util_est_update(&rq->cfs, p, task_sleep); util_est_update(&rq->cfs, p, task_sleep);
hrtick_update(rq); hrtick_update(rq);
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled())
bpf_sched_cfs_dequeue_task(rq, p);
#endif
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -5967,6 +6002,22 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, ...@@ -5967,6 +6002,22 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
{ {
int target = nr_cpumask_bits; int target = nr_cpumask_bits;
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled()) {
struct sched_affine_ctx ctx;
int ret;
ctx.task = p;
ctx.prev_cpu = prev_cpu;
ctx.curr_cpu = this_cpu;
ctx.is_sync = sync;
ret = bpf_sched_cfs_wake_affine(&ctx);
if (ret >= 0 && ret < nr_cpumask_bits)
return ret;
}
#endif
if (sched_feat(WA_IDLE)) if (sched_feat(WA_IDLE))
target = wake_affine_idle(this_cpu, prev_cpu, sync); target = wake_affine_idle(this_cpu, prev_cpu, sync);
...@@ -6851,6 +6902,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f ...@@ -6851,6 +6902,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
int new_cpu = prev_cpu; int new_cpu = prev_cpu;
int want_affine = 0; int want_affine = 0;
int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING); int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
#ifdef CONFIG_BPF_SCHED
struct sched_migrate_ctx ctx;
cpumask_t *cpus_prev = NULL;
cpumask_t *cpus;
int ret;
#endif
time = schedstat_start_time(); time = schedstat_start_time();
...@@ -6872,6 +6929,32 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f ...@@ -6872,6 +6929,32 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
} }
rcu_read_lock(); rcu_read_lock();
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled()) {
ctx.task = p;
ctx.prev_cpu = prev_cpu;
ctx.curr_cpu = cpu;
ctx.is_sync = sync;
ctx.wake_flags = wake_flags;
ctx.want_affine = want_affine;
ctx.sd_flag = sd_flag;
ctx.select_idle_mask = this_cpu_cpumask_var_ptr(select_idle_mask);
ret = bpf_sched_cfs_select_rq(&ctx);
if (ret >= 0) {
rcu_read_unlock();
return ret;
} else if (ret != -1) {
cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
if (cpumask_subset(cpus, p->cpus_ptr) &&
!cpumask_empty(cpus)) {
cpus_prev = (void *)p->cpus_ptr;
p->cpus_ptr = cpus;
}
}
}
#endif
for_each_domain(cpu, tmp) { for_each_domain(cpu, tmp) {
/* /*
* If both 'cpu' and 'prev_cpu' are part of this domain, * If both 'cpu' and 'prev_cpu' are part of this domain,
...@@ -6903,6 +6986,19 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f ...@@ -6903,6 +6986,19 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
if (want_affine) if (want_affine)
current->recent_used_cpu = cpu; current->recent_used_cpu = cpu;
} }
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled()) {
ctx.new_cpu = new_cpu;
ret = bpf_sched_cfs_select_rq_exit(&ctx);
if (ret >= 0)
new_cpu = ret;
if (cpus_prev)
p->cpus_ptr = cpus_prev;
}
#endif
rcu_read_unlock(); rcu_read_unlock();
schedstat_end_time(cpu_rq(cpu), time); schedstat_end_time(cpu_rq(cpu), time);
...@@ -7027,6 +7123,15 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) ...@@ -7027,6 +7123,15 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
{ {
s64 gran, vdiff = curr->vruntime - se->vruntime; s64 gran, vdiff = curr->vruntime - se->vruntime;
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled()) {
int ret = bpf_sched_cfs_wakeup_preempt_entity(curr, se);
if (ret)
return ret;
}
#endif
if (vdiff <= 0) if (vdiff <= 0)
return -1; return -1;
...@@ -7113,6 +7218,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ ...@@ -7113,6 +7218,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
likely(!task_has_idle_policy(p))) likely(!task_has_idle_policy(p)))
goto preempt; goto preempt;
#ifdef CONFIG_BPF_SCHED
if (bpf_sched_enabled()) {
int ret = bpf_sched_cfs_check_preempt_wakeup(current, p);
if (ret < 0)
return;
else if (ret > 0)
goto preempt;
}
#endif
/* /*
* Batch and idle tasks do not preempt non-idle tasks (their preemption * Batch and idle tasks do not preempt non-idle tasks (their preemption
* is driven by the tick): * is driven by the tick):
......
...@@ -454,7 +454,12 @@ struct task_group { ...@@ -454,7 +454,12 @@ struct task_group {
struct uclamp_se uclamp[UCLAMP_CNT]; struct uclamp_se uclamp[UCLAMP_CNT];
#endif #endif
#ifdef CONFIG_BPF_SCHED
/* Used to pad the tag of a group */
KABI_USE(1, long tag)
#else
KABI_RESERVE(1) KABI_RESERVE(1)
#endif
KABI_RESERVE(2) KABI_RESERVE(2)
KABI_RESERVE(3) KABI_RESERVE(3)
KABI_RESERVE(4) KABI_RESERVE(4)
...@@ -492,6 +497,9 @@ static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) ...@@ -492,6 +497,9 @@ static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
} }
extern int tg_nop(struct task_group *tg, void *data); extern int tg_nop(struct task_group *tg, void *data);
#ifdef CONFIG_BPF_SCHED
extern int tg_change_tag(struct task_group *tg, void *data);
#endif
extern void free_fair_sched_group(struct task_group *tg); extern void free_fair_sched_group(struct task_group *tg);
extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent); extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
......
...@@ -435,6 +435,15 @@ class PrinterHelpers(Printer): ...@@ -435,6 +435,15 @@ class PrinterHelpers(Printer):
'struct xdp_md', 'struct xdp_md',
'struct path', 'struct path',
'struct btf_ptr', 'struct btf_ptr',
'struct task_group',
'struct bpf_sched_cpu_stats',
'struct bpf_cpu_topology',
'struct bpf_cpumask_info',
'struct sched_entity',
'struct cpumask',
'struct cpumask_op_args',
'struct sched_migrate_ctx',
'struct sched_affine_ctx',
] ]
known_types = { known_types = {
'...', '...',
...@@ -478,6 +487,15 @@ class PrinterHelpers(Printer): ...@@ -478,6 +487,15 @@ class PrinterHelpers(Printer):
'struct task_struct', 'struct task_struct',
'struct path', 'struct path',
'struct btf_ptr', 'struct btf_ptr',
'struct task_group',
'struct bpf_sched_cpu_stats',
'struct bpf_cpu_topology',
'struct bpf_cpumask_info',
'struct sched_entity',
'struct cpumask',
'struct cpumask_op_args',
'struct sched_migrate_ctx',
'struct sched_affine_ctx',
} }
mapped_types = { mapped_types = {
'u8': '__u8', 'u8': '__u8',
......
...@@ -66,6 +66,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { ...@@ -66,6 +66,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_MODIFY_RETURN] = "mod_ret", [BPF_MODIFY_RETURN] = "mod_ret",
[BPF_LSM_MAC] = "lsm_mac", [BPF_LSM_MAC] = "lsm_mac",
[BPF_SK_LOOKUP] = "sk_lookup", [BPF_SK_LOOKUP] = "sk_lookup",
[BPF_SCHED] = "sched",
}; };
void p_err(const char *fmt, ...) void p_err(const char *fmt, ...)
......
...@@ -64,6 +64,7 @@ const char * const prog_type_name[] = { ...@@ -64,6 +64,7 @@ const char * const prog_type_name[] = {
[BPF_PROG_TYPE_EXT] = "ext", [BPF_PROG_TYPE_EXT] = "ext",
[BPF_PROG_TYPE_LSM] = "lsm", [BPF_PROG_TYPE_LSM] = "lsm",
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
[BPF_PROG_TYPE_SCHED] = "sched",
}; };
const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
......
...@@ -199,6 +199,9 @@ enum bpf_prog_type { ...@@ -199,6 +199,9 @@ enum bpf_prog_type {
BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SK_LOOKUP,
#ifndef __GENKSYMS__
BPF_PROG_TYPE_SCHED,
#endif
}; };
enum bpf_attach_type { enum bpf_attach_type {
...@@ -240,6 +243,9 @@ enum bpf_attach_type { ...@@ -240,6 +243,9 @@ enum bpf_attach_type {
BPF_XDP_CPUMAP, BPF_XDP_CPUMAP,
BPF_SK_LOOKUP, BPF_SK_LOOKUP,
BPF_XDP, BPF_XDP,
#ifndef __GENKSYMS__
BPF_SCHED,
#endif
__MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
}; };
...@@ -3755,6 +3761,117 @@ union bpf_attr { ...@@ -3755,6 +3761,117 @@ union bpf_attr {
* Get Ipv4 origdst or replysrc. Works with IPv4. * Get Ipv4 origdst or replysrc. Works with IPv4.
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
*
* long bpf_sched_tg_tag_of(struct task_group *tg)
* Description
* Return task group tag of *tg* if CONFIG_CGROUP_SCHED enabled.
* The bpf prog obtains the tags to detect different workloads.
* Return
* Task group tag, if CONFIG_CGROUP_SCHED enabled, 0 as default tag, or
* a negative error in case of failure.
*
* long bpf_sched_task_tag_of(struct task_struct *tsk)
* Description
* Return task tag of *tsk*.The bpf prog obtains the tags to detect
* different workloads.
* Return
* Task tag, if used, 0 as default tag, or a negative error in case of failure.
*
* int bpf_sched_set_tg_tag(struct task_group *tg, s64 tag)
* Description
* Set tag to *tg* and its descendants.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_sched_set_task_tag(struct task_struct *tsk, s64 tag)
* Description
* Set tag to *tsk*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_sched_cpu_stats_of(int cpu, struct bpf_sched_cpu_stats *ctx, int len)
* Description
* Get multiple types of *cpu* statistics and store in *ctx*.
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_init_cpu_topology(struct bpf_map *map)
* Description
* Initializing the cpu topology which used for bpf prog.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_get_cpumask_info(struct bpf_map *map, struct bpf_cpumask_info *cpus)
* Description
* Get system cpus returned in *cpus*.
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_sched_entity_is_task(struct sched_entity *se)
* Description
* Checks whether the sched entity is a task.
* Return
* 1 if true, 0 otherwise.
*
* struct task_struct *bpf_sched_entity_to_task(struct sched_entity *se)
* Description
* Return task struct of *se* if se is a task.
* Return
* Task struct if se is a task, NULL otherwise.
*
* struct task_group *bpf_sched_entity_to_tg(struct sched_entity *se)
* Description
* Return task group of *se* if se is a task group.
* Return
* Task struct if se is a task group, NULL otherwise.
*
* int bpf_cpumask_op(struct cpumask_op_args *op, int len)
* Description
* A series of cpumask-related operations. Perform different
* operations base on *op*->type. User also need fill other
* *op* field base on *op*->type. *op*->type is one of them
*
* **CPUMASK_EMPTY**
* *(op->arg1) == 0 returned.
* **CPUMASK_AND**
* *(op->arg1) = *(op->arg2) & *(op->arg3)
* **CPUMASK_ANDNOT**
* *(op->arg1) = *(op->arg2) & ~*(op->arg3)
* **CPUMASK_SUBSET**
* *(op->arg1) & ~*(op->arg2) == 0 returned
* **CPUMASK_EQUAL**
* *(op->arg1) == *(op->arg2) returned
* **CPUMASK_TEST_CPU**
* test for a cpu *(int)(op->arg1) in *(op->arg2)
* returns 1 if *op*->arg1 is set in *op*->arg2, else returns 0
* **CPUMASK_COPY**
* *(op->arg1) = *(op->arg2), return 0 always
* **CPUMASK_WEIGHT**
* count of bits in *(op->arg1)
* **CPUMASK_NEXT**
* get the next cpu in *(struct cpumask *)(op->arg2)
* *(int *)(op->arg1): the cpu prior to the place to search
* **CPUMASK_NEXT_WRAP**
* helper to implement for_each_cpu_wrap
* @op->arg1: the cpu prior to the place to search
* @op->arg2: the cpumask pointer
* @op->arg3: the start point of the iteration
* @op->arg4: assume @op->arg1 crossing @op->arg3 terminates the iteration
* returns >= nr_cpu_ids on completion
* **CPUMASK_NEXT_AND**
* get the next cpu in *(op->arg1) & *(op->arg2)
* **CPUMASK_CPULIST_PARSE**
* extract a cpumask from a user string of ranges.
* (char *)op->arg1 -> (struct cpumask *)(op->arg2)
* 0 on success, or a negative error in case of failure.
* Return
* View above.
*
* int bpf_cpus_share_cache(int src_cpu, int dst_cpu)
* Description
* check src_cpu whether share cache with dst_cpu.
* Return
* true yes, false no.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -3915,6 +4032,18 @@ union bpf_attr { ...@@ -3915,6 +4032,18 @@ union bpf_attr {
FN(redirect_peer), \ FN(redirect_peer), \
FN(get_sockops_uid_gid), \ FN(get_sockops_uid_gid), \
FN(sk_original_addr), \ FN(sk_original_addr), \
FN(sched_tg_tag_of), \
FN(sched_task_tag_of), \
FN(sched_set_tg_tag), \
FN(sched_set_task_tag), \
FN(sched_cpu_stats_of), \
FN(init_cpu_topology), \
FN(get_cpumask_info), \
FN(sched_entity_is_task), \
FN(sched_entity_to_task), \
FN(sched_entity_to_tg), \
FN(cpumask_op), \
FN(cpus_share_cache), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -236,7 +236,8 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, ...@@ -236,7 +236,8 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
attr.prog_type == BPF_PROG_TYPE_LSM) { attr.prog_type == BPF_PROG_TYPE_LSM) {
attr.attach_btf_id = load_attr->attach_btf_id; attr.attach_btf_id = load_attr->attach_btf_id;
} else if (attr.prog_type == BPF_PROG_TYPE_TRACING || } else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
attr.prog_type == BPF_PROG_TYPE_EXT) { attr.prog_type == BPF_PROG_TYPE_EXT ||
attr.prog_type == BPF_PROG_TYPE_SCHED) {
attr.attach_btf_id = load_attr->attach_btf_id; attr.attach_btf_id = load_attr->attach_btf_id;
attr.attach_prog_fd = load_attr->attach_prog_fd; attr.attach_prog_fd = load_attr->attach_prog_fd;
} else { } else {
......
...@@ -2504,7 +2504,8 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) ...@@ -2504,7 +2504,8 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
{ {
if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
prog->type == BPF_PROG_TYPE_LSM) prog->type == BPF_PROG_TYPE_LSM ||
prog->type == BPF_PROG_TYPE_SCHED)
return true; return true;
/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
...@@ -6722,7 +6723,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, ...@@ -6722,7 +6723,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
prog->type == BPF_PROG_TYPE_LSM) { prog->type == BPF_PROG_TYPE_LSM) {
load_attr.attach_btf_id = prog->attach_btf_id; load_attr.attach_btf_id = prog->attach_btf_id;
} else if (prog->type == BPF_PROG_TYPE_TRACING || } else if (prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_EXT) { prog->type == BPF_PROG_TYPE_EXT ||
prog->type == BPF_PROG_TYPE_SCHED) {
load_attr.attach_prog_fd = prog->attach_prog_fd; load_attr.attach_prog_fd = prog->attach_prog_fd;
load_attr.attach_btf_id = prog->attach_btf_id; load_attr.attach_btf_id = prog->attach_btf_id;
} else { } else {
...@@ -6829,7 +6831,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) ...@@ -6829,7 +6831,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if ((prog->type == BPF_PROG_TYPE_TRACING || if ((prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_LSM || prog->type == BPF_PROG_TYPE_LSM ||
prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { prog->type == BPF_PROG_TYPE_EXT ||
prog->type == BPF_PROG_TYPE_SCHED) && !prog->attach_btf_id) {
btf_id = libbpf_find_attach_btf_id(prog); btf_id = libbpf_find_attach_btf_id(prog);
if (btf_id <= 0) if (btf_id <= 0)
return btf_id; return btf_id;
...@@ -8254,6 +8257,7 @@ BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); ...@@ -8254,6 +8257,7 @@ BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
BPF_PROG_TYPE_FNS(sched, BPF_PROG_TYPE_SCHED);
enum bpf_attach_type enum bpf_attach_type
bpf_program__get_expected_attach_type(struct bpf_program *prog) bpf_program__get_expected_attach_type(struct bpf_program *prog)
...@@ -8318,6 +8322,8 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, ...@@ -8318,6 +8322,8 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
struct bpf_program *prog); struct bpf_program *prog);
static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
struct bpf_program *prog); struct bpf_program *prog);
static struct bpf_link *attach_sched(const struct bpf_sec_def *sec,
struct bpf_program *prog);
static const struct bpf_sec_def section_defs[] = { static const struct bpf_sec_def section_defs[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
...@@ -8386,6 +8392,10 @@ static const struct bpf_sec_def section_defs[] = { ...@@ -8386,6 +8392,10 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_ITER, .expected_attach_type = BPF_TRACE_ITER,
.is_attach_btf = true, .is_attach_btf = true,
.attach_fn = attach_iter), .attach_fn = attach_iter),
SEC_DEF("sched/", SCHED,
.is_attach_btf = true,
.expected_attach_type = BPF_SCHED,
.attach_fn = attach_sched),
BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
BPF_XDP_DEVMAP), BPF_XDP_DEVMAP),
BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
...@@ -8469,7 +8479,7 @@ static const struct bpf_sec_def section_defs[] = { ...@@ -8469,7 +8479,7 @@ static const struct bpf_sec_def section_defs[] = {
#undef BPF_APROG_COMPAT #undef BPF_APROG_COMPAT
#undef SEC_DEF #undef SEC_DEF
#define MAX_TYPE_NAME_SIZE 32 #define MAX_TYPE_NAME_SIZE 31
static const struct bpf_sec_def *find_sec_def(const char *sec_name) static const struct bpf_sec_def *find_sec_def(const char *sec_name)
{ {
...@@ -8673,6 +8683,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, ...@@ -8673,6 +8683,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
#define BTF_TRACE_PREFIX "btf_trace_" #define BTF_TRACE_PREFIX "btf_trace_"
#define BTF_LSM_PREFIX "bpf_lsm_" #define BTF_LSM_PREFIX "bpf_lsm_"
#define BTF_ITER_PREFIX "bpf_iter_" #define BTF_ITER_PREFIX "bpf_iter_"
#define BTF_SCHED_PREFIX "bpf_sched_"
#define BTF_MAX_NAME_SIZE 128 #define BTF_MAX_NAME_SIZE 128
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
...@@ -8706,6 +8717,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, ...@@ -8706,6 +8717,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
else if (attach_type == BPF_TRACE_ITER) else if (attach_type == BPF_TRACE_ITER)
err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name, err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
BTF_KIND_FUNC); BTF_KIND_FUNC);
else if (attach_type == BPF_SCHED)
err = find_btf_by_prefix_kind(btf, BTF_SCHED_PREFIX, name,
BTF_KIND_FUNC);
else else
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
...@@ -9685,6 +9699,11 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) ...@@ -9685,6 +9699,11 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
return bpf_program__attach_btf_id(prog); return bpf_program__attach_btf_id(prog);
} }
struct bpf_link *bpf_program__attach_sched(struct bpf_program *prog)
{
return bpf_program__attach_btf_id(prog);
}
struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog) struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
{ {
return bpf_program__attach_btf_id(prog); return bpf_program__attach_btf_id(prog);
...@@ -9696,6 +9715,12 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, ...@@ -9696,6 +9715,12 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
return bpf_program__attach_trace(prog); return bpf_program__attach_trace(prog);
} }
static struct bpf_link *attach_sched(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
return bpf_program__attach_sched(prog);
}
static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
struct bpf_program *prog) struct bpf_program *prog)
{ {
......
...@@ -264,6 +264,8 @@ bpf_program__attach_xdp(struct bpf_program *prog, int ifindex); ...@@ -264,6 +264,8 @@ bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
LIBBPF_API struct bpf_link * LIBBPF_API struct bpf_link *
bpf_program__attach_freplace(struct bpf_program *prog, bpf_program__attach_freplace(struct bpf_program *prog,
int target_fd, const char *attach_func_name); int target_fd, const char *attach_func_name);
LIBBPF_API struct bpf_link *
bpf_program__attach_sched(struct bpf_program *prog);
struct bpf_map; struct bpf_map;
...@@ -360,6 +362,7 @@ LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); ...@@ -360,6 +362,7 @@ LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sched(struct bpf_program *prog);
LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
...@@ -388,6 +391,7 @@ LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); ...@@ -388,6 +391,7 @@ LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_sched(const struct bpf_program *prog);
/* /*
* No need for __attribute__((packed)), all members of 'bpf_map_def' * No need for __attribute__((packed)), all members of 'bpf_map_def'
......
...@@ -336,4 +336,7 @@ LIBBPF_0.2.0 { ...@@ -336,4 +336,7 @@ LIBBPF_0.2.0 {
perf_buffer__epoll_fd; perf_buffer__epoll_fd;
perf_buffer__consume_buffer; perf_buffer__consume_buffer;
xsk_socket__create_shared; xsk_socket__create_shared;
bpf_program__attach_sched;
bpf_program__is_sched;
bpf_program__set_sched;
} LIBBPF_0.1.0; } LIBBPF_0.1.0;
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __LIBBPF_LIBSCHED_H
#define __LIBBPF_LIBSCHED_H
#include <linux/bpf_topology.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
/* set bigger value may lead verifier failed */
#define BPF_SCHED_LOOP_MAX 1024
#define INVALID_PTR ((void *)(0UL))
#define getVal(P) \
({ \
typeof(P) val = 0; \
bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
val; \
})
static __always_inline long libbpf_cpumask_next(int n, struct cpumask *mask);
static __always_inline long libbpf_cpumask_next_wrap(int n,
struct cpumask *mask,
int start, int wrap);
static __always_inline long libbpf_cpumask_next_and(int n,
struct cpumask *mask1,
struct cpumask *mask2);
static __always_inline int libbpf_nr_cpus_ids(void);
static __always_inline int libbpf_nr_cpumask_bits(void);
#if NR_CPUS == 1
#define libbpf_for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define libbpf_for_each_cpu_wrap(cpu, mask, start) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
#define libbpf_for_each_cpu_and(cpu, mask1, mask2) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2)
#else
#define libbpf_for_each_cpu(cpu, mask) \
for (int __i = 0, (cpu) = -1; \
(cpu) = libbpf_cpumask_next((cpu), (mask)), \
(cpu) < libbpf_nr_cpus_ids() && __i < NR_CPUS; __i++)
#define libbpf_for_each_cpu_wrap(cpu, mask, start) \
for (int __i = 0, (cpu) = libbpf_cpumask_next_wrap((start) - 1,\
(mask), (start), false); \
(cpu) < libbpf_nr_cpumask_bits() && __i < NR_CPUS; \
(cpu) = libbpf_cpumask_next_wrap((cpu), (mask), (start),\
true), __i++)
#define libbpf_for_each_cpu_and(cpu, mask1, mask2) \
for (int __i = 0, (cpu) = -1; \
(cpu) = libbpf_cpumask_next_and((cpu), (mask1), (mask2)),\
(cpu) < libbpf_nr_cpus_ids() && __i < NR_CPUS; __i++)
#endif
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
__type(value, struct bpf_cpumask_info);
__uint(max_entries, 1);
} map_cpumask_info SEC(".maps");
static __always_inline long libbpf_cpumask_copy(struct cpumask *dst,
struct cpumask *src)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_COPY;
op.arg1 = dst;
op.arg2 = src;
op.arg3 = INVALID_PTR;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_empty(struct cpumask *mask)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_EMPTY;
op.arg1 = mask;
op.arg2 = INVALID_PTR;
op.arg3 = INVALID_PTR;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_and(struct cpumask *dst,
struct cpumask *src1,
struct cpumask *src2)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_AND;
op.arg1 = dst;
op.arg2 = src1;
op.arg3 = src2;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_andnot(struct cpumask *dst,
struct cpumask *src1,
struct cpumask *src2)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_ANDNOT;
op.arg1 = dst;
op.arg2 = src1;
op.arg3 = src2;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_subset(struct cpumask *src1,
struct cpumask *src2)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_SUBSET;
op.arg1 = src1;
op.arg2 = src2;
op.arg3 = INVALID_PTR;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_equal(struct cpumask *src1,
struct cpumask *src2)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_EQUAL;
op.arg1 = src1;
op.arg2 = src2;
op.arg3 = INVALID_PTR;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_weight(struct cpumask *src1)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_WEIGHT;
op.arg1 = src1;
op.arg2 = INVALID_PTR;
op.arg3 = INVALID_PTR;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_test_cpu(int cpu,
struct cpumask *mask)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_TEST_CPU;
op.arg1 = &cpu;
op.arg2 = mask;
op.arg3 = INVALID_PTR;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_next(int n, struct cpumask *mask)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_NEXT;
op.arg1 = &n;
op.arg2 = mask;
op.arg3 = INVALID_PTR;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_next_wrap(int n,
struct cpumask *mask,
int start, int wrap)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_NEXT_WRAP;
op.arg1 = &n;
op.arg2 = mask;
op.arg3 = &start;
op.arg4 = &wrap;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_next_and(int n,
struct cpumask *mask1,
struct cpumask *mask2)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_NEXT_AND;
op.arg1 = &n;
op.arg2 = mask1;
op.arg3 = mask2;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline long libbpf_cpumask_cpulist_parse(char *src1,
struct cpumask *src2)
{
struct cpumask_op_args op;
op.op_type = CPUMASK_CPULIST_PARSE;
op.arg1 = src1;
op.arg2 = src2;
op.arg3 = INVALID_PTR;
op.arg4 = INVALID_PTR;
return bpf_cpumask_op(&op, sizeof(op));
}
static __always_inline int libbpf_num_active_cpus(void)
{
struct bpf_cpumask_info *cpus;
int key = 0;
cpus = bpf_map_lookup_elem(&map_cpumask_info, &key);
if (!cpus)
return -1;
bpf_get_cpumask_info(&map_cpumask_info, cpus);
return getVal(cpus->nums_active_cpus);
}
static __always_inline int libbpf_num_possible_cpus(void)
{
struct bpf_cpumask_info *cpus;
int key = 0;
cpus = bpf_map_lookup_elem(&map_cpumask_info, &key);
if (!cpus)
return -1;
bpf_get_cpumask_info(&map_cpumask_info, cpus);
return getVal(cpus->nums_possible_cpus);
}
static __always_inline void libbpf_possible_cpus_mask(struct cpumask *mask)
{
struct bpf_cpumask_info *cpus;
int key = 0;
cpus = bpf_map_lookup_elem(&map_cpumask_info, &key);
if (!cpus)
return;
bpf_get_cpumask_info(&map_cpumask_info, cpus);
libbpf_cpumask_copy(mask, &cpus->cpu_possible_cpumask);
}
static __always_inline void libbpf_active_cpus_mask(struct cpumask *mask)
{
struct bpf_cpumask_info *cpus;
int key = 0;
cpus = bpf_map_lookup_elem(&map_cpumask_info, &key);
if (!cpus)
return;
bpf_get_cpumask_info(&map_cpumask_info, cpus);
libbpf_cpumask_copy(mask, &cpus->cpu_active_cpumask);
}
static __always_inline void libbpf_isolate_cpus_mask(struct cpumask *mask)
{
struct bpf_cpumask_info *cpus;
int key = 0;
cpus = bpf_map_lookup_elem(&map_cpumask_info, &key);
if (!cpus)
return;
bpf_get_cpumask_info(&map_cpumask_info, cpus);
libbpf_cpumask_copy(mask, &cpus->cpu_isolate_cpumask);
}
static __always_inline int libbpf_nr_cpus_ids(void)
{
struct bpf_cpumask_info *cpus;
int key = 0;
cpus = bpf_map_lookup_elem(&map_cpumask_info, &key);
if (!cpus)
return -1;
bpf_get_cpumask_info(&map_cpumask_info, cpus);
return getVal(cpus->nr_cpu_ids);
}
static __always_inline int libbpf_nr_cpumask_bits(void)
{
struct bpf_cpumask_info *cpus;
int key = 0;
cpus = bpf_map_lookup_elem(&map_cpumask_info, &key);
if (!cpus)
return -1;
bpf_get_cpumask_info(&map_cpumask_info, cpus);
return getVal(cpus->bpf_nr_cpumask_bits);
}
static __always_inline unsigned long libbpf_cfs_load_avg_of(int cpu)
{
struct bpf_sched_cpu_stats load;
bpf_sched_cpu_stats_of(cpu, &load, sizeof(load));
return getVal(load.cfs_load_avg);
}
static __always_inline unsigned long libbpf_cfs_runnable_avg_of(int cpu)
{
struct bpf_sched_cpu_stats load;
bpf_sched_cpu_stats_of(cpu, &load, sizeof(load));
return getVal(load.cfs_runnable_avg);
}
static __always_inline unsigned long libbpf_cfs_util_avg_of(int cpu)
{
struct bpf_sched_cpu_stats load;
bpf_sched_cpu_stats_of(cpu, &load, sizeof(load));
return getVal(load.cfs_util_avg);
}
static __always_inline unsigned long libbpf_rt_load_avg_of(int cpu)
{
struct bpf_sched_cpu_stats load;
bpf_sched_cpu_stats_of(cpu, &load, sizeof(load));
return load.rt_load_avg;
}
static __always_inline unsigned long libbpf_rt_runnable_avg_of(int cpu)
{
struct bpf_sched_cpu_stats load;
bpf_sched_cpu_stats_of(cpu, &load, sizeof(load));
return load.rt_runnable_avg;
}
static __always_inline unsigned long libbpf_rt_util_avg_of(int cpu)
{
struct bpf_sched_cpu_stats load;
bpf_sched_cpu_stats_of(cpu, &load, sizeof(load));
return load.rt_util_avg;
}
static __always_inline unsigned long libbpf_irq_load_avg_of(int cpu)
{
struct bpf_sched_cpu_stats load;
bpf_sched_cpu_stats_of(cpu, &load, sizeof(load));
return load.irq_load_avg;
}
static __always_inline unsigned long libbpf_irq_util_avg_of(int cpu)
{
struct bpf_sched_cpu_stats load;
bpf_sched_cpu_stats_of(cpu, &load, sizeof(load));
return load.irq_util_avg;
}
static __always_inline unsigned int libbpf_nr_running_of(int cpu)
{
struct bpf_sched_cpu_stats running;
bpf_sched_cpu_stats_of(cpu, &running, sizeof(running));
return getVal(running.nr_running);
}
static __always_inline unsigned int libbpf_cfs_nr_running_of(int cpu)
{
struct bpf_sched_cpu_stats running;
bpf_sched_cpu_stats_of(cpu, &running, sizeof(running));
return getVal(running.cfs_nr_running);
}
static __always_inline unsigned int libbpf_cfs_h_nr_running_of(int cpu)
{
struct bpf_sched_cpu_stats running;
bpf_sched_cpu_stats_of(cpu, &running, sizeof(running));
return getVal(running.cfs_h_nr_running);
}
static __always_inline unsigned int libbpf_cfs_idle_h_nr_running_of(int cpu)
{
struct bpf_sched_cpu_stats running;
bpf_sched_cpu_stats_of(cpu, &running, sizeof(running));
return running.cfs_idle_h_nr_running;
}
static __always_inline unsigned int libbpf_rt_nr_running_of(int cpu)
{
struct bpf_sched_cpu_stats running;
bpf_sched_cpu_stats_of(cpu, &running, sizeof(running));
return getVal(running.rt_nr_running);
}
static __always_inline unsigned int libbpf_rr_nr_running_of(int cpu)
{
struct bpf_sched_cpu_stats running;
bpf_sched_cpu_stats_of(cpu, &running, sizeof(running));
return running.rr_nr_running;
}
static __always_inline unsigned int libbpf_exit_latency_of(int cpu)
{
struct bpf_sched_cpu_stats stat;
bpf_sched_cpu_stats_of(cpu, &stat, sizeof(stat));
return stat.exit_latency;
}
static __always_inline unsigned long libbpf_idle_stamp_of(int cpu)
{
struct bpf_sched_cpu_stats stat;
bpf_sched_cpu_stats_of(cpu, &stat, sizeof(stat));
return stat.idle_stamp;
}
static __always_inline unsigned long libbpf_avg_idle_of(int cpu)
{
struct bpf_sched_cpu_stats stat;
bpf_sched_cpu_stats_of(cpu, &stat, sizeof(stat));
return stat.avg_idle;
}
static __always_inline unsigned long libbpf_available_idle_cpu(int cpu)
{
struct bpf_sched_cpu_stats stat;
bpf_sched_cpu_stats_of(cpu, &stat, sizeof(stat));
return getVal(stat.available_idle);
}
static __always_inline unsigned long libbpf_capacity_of(int cpu)
{
struct bpf_sched_cpu_stats cap;
bpf_sched_cpu_stats_of(cpu, &cap, sizeof(cap));
return getVal(cap.capacity);
}
static __always_inline unsigned long libbpf_capacity_orig_of(int cpu)
{
struct bpf_sched_cpu_stats cap;
bpf_sched_cpu_stats_of(cpu, &cap, sizeof(cap));
return cap.capacity_orig;
}
static __always_inline int libbpf_cpus_share_cache(int src_cpu, int dst_cpu)
{
return bpf_cpus_share_cache(src_cpu, dst_cpu);
}
static __always_inline int libbpf_sched_se_tag_of(struct sched_entity *se)
{
int se_tag = 0;
if (bpf_sched_entity_is_task(se)) {
struct task_struct *task = bpf_sched_entity_to_task(se);
se_tag = bpf_sched_task_tag_of(task);
} else {
struct task_group *tg = bpf_sched_entity_to_tg(se);
se_tag = bpf_sched_tg_tag_of(tg);
}
return se_tag;
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册