提交 ebeb84ad 编写于 作者: T tanghui 提交者: Zhong Jinghua

cpuset: Introduce new interface for scheduler dynamic affinity

hulk inclusion
category: feature
bugzilla: 186575, https://gitee.com/openeuler/kernel/issues/I526XC

--------------------------------

Add 'prefer_cpus' sysfs and related interface in cgroup cpuset.
Signed-off-by: Ntanghui <tanghui20@huawei.com>
Signed-off-by: NZheng Zucheng <zhengzucheng@huawei.com>
Reviewed-by: NZhang Qiao <zhangqiao22@huawei.com>
上级 229f5c1f
......@@ -3251,6 +3251,76 @@ static const struct file_operations proc_setgroups_operations = {
};
#endif /* CONFIG_USER_NS */
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
static int preferred_cpuset_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
struct task_struct *p;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
if (p->prefer_cpus)
seq_printf(m, "%*pbl\n", cpumask_pr_args(p->prefer_cpus));
else
seq_putc(m, '\n');
put_task_struct(p);
return 0;
}
static ssize_t preferred_cpuset_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
cpumask_var_t new_mask;
int retval;
struct inode *inode = file_inode(file);
struct task_struct *p;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
retval = -ENOMEM;
goto out_put_task;
}
retval = cpumask_parselist_user(buf, count, new_mask);
if (retval < 0)
goto out_free_cpumask;
retval = set_prefer_cpus_ptr(p, new_mask);
if (retval < 0)
goto out_free_cpumask;
retval = count;
out_free_cpumask:
free_cpumask_var(new_mask);
out_put_task:
put_task_struct(p);
return retval;
}
static int preferred_cpuset_open(struct inode *inode, struct file *filp)
{
return single_open(filp, preferred_cpuset_show, inode);
}
static const struct file_operations proc_preferred_cpuset_operations = {
.open = preferred_cpuset_open,
.write = preferred_cpuset_write,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
......@@ -3820,6 +3890,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_BPF_SCHED
REG("tag", 0644, proc_pid_tag_operations),
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
REG("preferred_cpuset", 0644, proc_preferred_cpuset_operations),
#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
......
......@@ -1422,7 +1422,11 @@ struct task_struct {
KABI_RESERVE(6)
#endif
KABI_USE(7, void *pf_io_worker)
#if defined(CONFIG_QOS_SCHED_DYNAMIC_AFFINITY) && !defined(__GENKSYMS__)
KABI_USE(8, cpumask_t *prefer_cpus)
#else
KABI_RESERVE(8)
#endif
KABI_RESERVE(9)
KABI_RESERVE(10)
KABI_RESERVE(11)
......@@ -2206,6 +2210,13 @@ static inline int sched_qos_cpu_overload(void)
}
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
int set_prefer_cpus_ptr(struct task_struct *p,
const struct cpumask *new_mask);
int sched_prefer_cpus_fork(struct task_struct *p, struct cpumask *mask);
void sched_prefer_cpus_free(struct task_struct *p);
#endif
#ifdef CONFIG_BPF_SCHED
extern void sched_settag(struct task_struct *tsk, s64 tag);
......
......@@ -214,6 +214,9 @@ struct task_struct init_task
#ifdef CONFIG_SECURITY
.security = NULL,
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
.prefer_cpus = NULL,
#endif
#ifdef CONFIG_SECCOMP_FILTER
.seccomp = { .filter_count = ATOMIC_INIT(0) },
#endif
......
......@@ -107,6 +107,9 @@ struct cpuset {
/* user-configured CPUs and Memory Nodes allow to tasks */
cpumask_var_t cpus_allowed;
nodemask_t mems_allowed;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_var_t prefer_cpus;
#endif
/* effective CPUs and Memory Nodes allow to tasks */
cpumask_var_t effective_cpus;
......@@ -193,6 +196,9 @@ struct cpuset {
struct tmpmasks {
cpumask_var_t addmask, delmask; /* For partition root */
cpumask_var_t new_cpus; /* For update_cpumasks_hier() */
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_var_t prefer_cpus;
#endif
};
static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
......@@ -472,15 +478,24 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
{
cpumask_var_t *pmask1, *pmask2, *pmask3;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_var_t *pmask4;
#endif
if (cs) {
pmask1 = &cs->cpus_allowed;
pmask2 = &cs->effective_cpus;
pmask3 = &cs->subparts_cpus;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
pmask4 = &cs->prefer_cpus;
#endif
} else {
pmask1 = &tmp->new_cpus;
pmask2 = &tmp->addmask;
pmask3 = &tmp->delmask;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
pmask4 = &tmp->prefer_cpus;
#endif
}
if (!zalloc_cpumask_var(pmask1, GFP_KERNEL))
......@@ -491,9 +506,17 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
if (!zalloc_cpumask_var(pmask3, GFP_KERNEL))
goto free_two;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!zalloc_cpumask_var(pmask4, GFP_KERNEL))
goto free_three;
#endif
return 0;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_three:
free_cpumask_var(*pmask3);
#endif
free_two:
free_cpumask_var(*pmask2);
free_one:
......@@ -509,11 +532,17 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
{
if (cs) {
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_cpumask_var(cs->prefer_cpus);
#endif
free_cpumask_var(cs->cpus_allowed);
free_cpumask_var(cs->effective_cpus);
free_cpumask_var(cs->subparts_cpus);
}
if (tmp) {
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_cpumask_var(tmp->prefer_cpus);
#endif
free_cpumask_var(tmp->new_cpus);
free_cpumask_var(tmp->addmask);
free_cpumask_var(tmp->delmask);
......@@ -537,6 +566,9 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
return NULL;
}
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_copy(trial->prefer_cpus, cs->prefer_cpus);
#endif
cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
cpumask_copy(trial->effective_cpus, cs->effective_cpus);
return trial;
......@@ -580,6 +612,11 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
rcu_read_lock();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
ret = -EINVAL;
if (!cpumask_subset(cur->prefer_cpus, trial->cpus_allowed))
goto out;
#endif
/* Each of our child cpusets must be a subset of us */
ret = -EBUSY;
cpuset_for_each_child(c, css, cur)
......@@ -644,6 +681,66 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
return ret;
}
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
static cpumask_var_t prefer_cpus_attach;
static void update_tasks_prefer_cpumask(struct cpuset *cs)
{
struct css_task_iter it;
struct task_struct *task;
css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it)))
set_prefer_cpus_ptr(task, cs->prefer_cpus);
css_task_iter_end(&it);
}
/*
* update_prefer_cpumask - update the prefer_cpus mask of a cpuset and
* all tasks in it
* @cs: the cpuset to consider
* @trialcs: trial cpuset
* @buf: buffer of cpu numbers written to this cpuset
*/
static int update_prefer_cpumask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf)
{
int retval;
if (cs == &top_cpuset)
return -EACCES;
/*
* An empty prefer_cpus is ok which mean that the cpuset tasks disable
* dynamic affinity feature.
* Since cpulist_parse() fails on an empty mask, we special case
* that parsing.
*/
if (!*buf) {
cpumask_clear(trialcs->prefer_cpus);
} else {
retval = cpulist_parse(buf, trialcs->prefer_cpus);
if (retval < 0)
return retval;
}
/* Nothing to do if the cpus didn't change */
if (cpumask_equal(cs->prefer_cpus, trialcs->prefer_cpus))
return 0;
if (!cpumask_subset(trialcs->prefer_cpus, cs->cpus_allowed))
return -EINVAL;
update_tasks_prefer_cpumask(trialcs);
spin_lock_irq(&callback_lock);
cpumask_copy(cs->prefer_cpus, trialcs->prefer_cpus);
spin_unlock_irq(&callback_lock);
return 0;
}
#endif
#ifdef CONFIG_SMP
/*
* Helper routine for generate_sched_domains().
......@@ -2229,6 +2326,10 @@ static void cpuset_attach(struct cgroup_taskset *tset)
else
guarantee_online_cpus(cs, cpus_attach);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_copy(prefer_cpus_attach, cs->prefer_cpus);
#endif
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cgroup_taskset_for_each(task, css, tset) {
......@@ -2237,6 +2338,9 @@ static void cpuset_attach(struct cgroup_taskset *tset)
* fail. TODO: have a better way to handle failure here
*/
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
set_prefer_cpus_ptr(task, prefer_cpus_attach);
#endif
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flag(cs, task);
......@@ -2297,6 +2401,9 @@ typedef enum {
FILE_MEMORY_PRESSURE,
FILE_SPREAD_PAGE,
FILE_SPREAD_SLAB,
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
FILE_DYNAMIC_CPULIST,
#endif
} cpuset_filetype_t;
static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
......@@ -2427,6 +2534,11 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
case FILE_MEMLIST:
retval = update_nodemask(cs, trialcs, buf);
break;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
case FILE_DYNAMIC_CPULIST:
retval = update_prefer_cpumask(cs, trialcs, buf);
break;
#endif
default:
retval = -EINVAL;
break;
......@@ -2474,6 +2586,11 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
case FILE_SUBPARTS_CPULIST:
seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->subparts_cpus));
break;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
case FILE_DYNAMIC_CPULIST:
seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->prefer_cpus));
break;
#endif
default:
ret = -EINVAL;
}
......@@ -2681,7 +2798,15 @@ static struct cftype legacy_files[] = {
.write_u64 = cpuset_write_u64,
.private = FILE_MEMORY_PRESSURE_ENABLED,
},
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
{
.name = "preferred_cpus",
.seq_show = cpuset_common_seq_show,
.write = cpuset_write_resmask,
.max_write_len = (100U + 6 * NR_CPUS),
.private = FILE_DYNAMIC_CPULIST,
},
#endif
{ } /* terminate */
};
......@@ -2830,6 +2955,9 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
cs->effective_mems = parent->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_copy(cs->prefer_cpus, parent->prefer_cpus);
#endif
spin_unlock_irq(&callback_lock);
out_unlock:
percpu_up_write(&cpuset_rwsem);
......@@ -2912,6 +3040,9 @@ static void cpuset_fork(struct task_struct *task)
return;
set_cpus_allowed_ptr(task, current->cpus_ptr);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
set_prefer_cpus_ptr(task, current->prefer_cpus);
#endif
task->mems_allowed = current->mems_allowed;
}
......@@ -2945,17 +3076,26 @@ int __init cpuset_init(void)
BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
BUG_ON(!alloc_cpumask_var(&top_cpuset.prefer_cpus, GFP_KERNEL));
#endif
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
cpumask_setall(top_cpuset.effective_cpus);
nodes_setall(top_cpuset.effective_mems);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_clear(top_cpuset.prefer_cpus);
#endif
fmeter_init(&top_cpuset.fmeter);
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
top_cpuset.relax_domain_level = -1;
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
BUG_ON(!alloc_cpumask_var(&prefer_cpus_attach, GFP_KERNEL));
#endif
return 0;
}
......@@ -2992,6 +3132,9 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
struct cpumask *new_cpus, nodemask_t *new_mems,
bool cpus_updated, bool mems_updated)
{
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_t prefer_cpus;
#endif
bool is_empty;
spin_lock_irq(&callback_lock);
......@@ -3010,6 +3153,13 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
if (mems_updated && !nodes_empty(cs->mems_allowed))
update_tasks_nodemask(cs);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!cpumask_subset(cs->prefer_cpus, cs->cpus_allowed)) {
cpumask_and(&prefer_cpus, cs->prefer_cpus, cs->cpus_allowed);
cpumask_copy(cs->prefer_cpus, &prefer_cpus);
update_tasks_prefer_cpumask(cs);
}
#endif
is_empty = cpumask_empty(cs->cpus_allowed) ||
nodes_empty(cs->mems_allowed);
......
......@@ -468,6 +468,9 @@ void free_task(struct task_struct *tsk)
arch_release_task_struct(tsk);
if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
sched_prefer_cpus_free(tsk);
#endif
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
......@@ -929,6 +932,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->seccomp.filter = NULL;
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
tsk->prefer_cpus = NULL;
#endif
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
clear_tsk_need_resched(tsk);
......@@ -2047,6 +2054,12 @@ static __latent_entropy struct task_struct *copy_process(
rt_mutex_init_task(p);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
retval = sched_prefer_cpus_fork(p, current->prefer_cpus);
if (retval)
goto bad_fork_free;
#endif
lockdep_assert_irqs_enabled();
#ifdef CONFIG_PROVE_LOCKING
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
......
......@@ -9763,6 +9763,101 @@ static int __maybe_unused cpu_period_quota_parse(char *buf,
return 0;
}
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
int sched_prefer_cpus_fork(struct task_struct *p, struct cpumask *mask)
{
p->prefer_cpus = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
if (!p->prefer_cpus)
return -ENOMEM;
if (mask)
cpumask_copy(p->prefer_cpus, mask);
else
cpumask_clear(p->prefer_cpus);
return 0;
}
void sched_prefer_cpus_free(struct task_struct *p)
{
kfree(p->prefer_cpus);
}
static void do_set_prefer_cpus(struct task_struct *p,
const struct cpumask *new_mask)
{
struct rq *rq = task_rq(p);
bool queued, running;
lockdep_assert_held(&p->pi_lock);
queued = task_on_rq_queued(p);
running = task_current(rq, p);
if (queued) {
/*
* Because __kthread_bind() calls this on blocked tasks without
* holding rq->lock.
*/
lockdep_assert_held(&rq->__lock);
dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
}
if (running)
put_prev_task(rq, p);
cpumask_copy(p->prefer_cpus, new_mask);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
if (running)
set_next_task(rq, p);
}
/*
* Change a given task's prefer CPU affinity. Prioritize migrate the thread to
* prefer cpus according to preferred bitmask.
*
* NOTE: the caller must have a valid reference to the task, the
* task must not exit() & deallocate itself prematurely. The
* call is not atomic; no spinlocks may be held.
*/
static int __set_prefer_cpus_ptr(struct task_struct *p,
const struct cpumask *new_mask, bool check)
{
struct rq_flags rf;
struct rq *rq;
int ret = 0;
if (unlikely(!p->prefer_cpus))
return -EINVAL;
rq = task_rq_lock(p, &rf);
update_rq_clock(rq);
if (cpumask_equal(p->prefer_cpus, new_mask))
goto out;
if (!cpumask_subset(new_mask, p->cpus_ptr)) {
ret = -EINVAL;
goto out;
}
do_set_prefer_cpus(p, new_mask);
out:
task_rq_unlock(rq, p, &rf);
return ret;
}
int set_prefer_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
if (p->sched_class != &fair_sched_class)
return 0;
return __set_prefer_cpus_ptr(p, new_mask, false);
}
#endif
#ifdef CONFIG_CFS_BANDWIDTH
static int cpu_max_show(struct seq_file *sf, void *v)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册