提交 243865da 编写于 作者: H Hui Tang 提交者: Yongqiang Liu

cpuset: Introduce new interface for scheduler dynamic affinity

hulk inclusion
category: feature
bugzilla: 187173, https://gitee.com/openeuler/kernel/issues/I5G4IH
CVE: NA

--------------------------------

Add 'prefer_cpus' sysfs and related interface in cgroup cpuset.
Signed-off-by: NHui Tang <tanghui20@huawei.com>
Reviewed-by: NZhang Qiao <zhangqiao22@huawei.com>
Reviewed-by: NChen Hui <judy.chenhui@huawei.com>
Reviewed-by: NChen Hui <judy.chenhui@huawei.com>
Signed-off-by: NYongqiang Liu <liuyongqiang13@huawei.com>
上级 5cabb5b5
......@@ -1247,7 +1247,16 @@ struct task_struct {
#else
KABI_RESERVE(5)
#endif
#if !defined(__GENKSYMS__)
#if defined(CONFIG_QOS_SCHED_DYNAMIC_AFFINITY)
cpumask_t *prefer_cpus;
#else
KABI_RESERVE(6)
#endif
#else
KABI_RESERVE(6)
#endif
KABI_RESERVE(7)
KABI_RESERVE(8)
......@@ -1964,4 +1973,12 @@ static inline int sched_qos_cpu_overload(void)
}
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
int dynamic_affinity_enabled(void);
int set_prefer_cpus_ptr(struct task_struct *p,
const struct cpumask *new_mask);
int sched_prefer_cpus_fork(struct task_struct *p, struct task_struct *orig);
void sched_prefer_cpus_free(struct task_struct *p);
#endif
#endif
......@@ -180,6 +180,9 @@ struct task_struct init_task
#ifdef CONFIG_SECURITY
.security = NULL,
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
.prefer_cpus = NULL,
#endif
#ifdef CONFIG_PID_RESERVE
.fork_pid_union = {
.fork_pid = 0,
......
......@@ -104,6 +104,9 @@ struct cpuset {
/* user-configured CPUs and Memory Nodes allow to tasks */
cpumask_var_t cpus_allowed;
nodemask_t mems_allowed;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_var_t prefer_cpus;
#endif
/* effective CPUs and Memory Nodes allow to tasks */
cpumask_var_t effective_cpus;
......@@ -436,11 +439,22 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
goto free_cs;
if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL))
goto free_cpus;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!alloc_cpumask_var(&trial->prefer_cpus, GFP_KERNEL))
goto free_prefer_cpus;
#endif
cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
cpumask_copy(trial->effective_cpus, cs->effective_cpus);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_copy(trial->prefer_cpus, cs->prefer_cpus);
#endif
return trial;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_prefer_cpus:
free_cpumask_var(trial->effective_cpus);
#endif
free_cpus:
free_cpumask_var(trial->cpus_allowed);
free_cs:
......@@ -456,6 +470,9 @@ static void free_trial_cpuset(struct cpuset *trial)
{
free_cpumask_var(trial->effective_cpus);
free_cpumask_var(trial->cpus_allowed);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_cpumask_var(trial->prefer_cpus);
#endif
kfree(trial);
}
......@@ -487,6 +504,11 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
rcu_read_lock();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
ret = -EINVAL;
if (!cpumask_subset(cur->prefer_cpus, trial->cpus_allowed))
goto out;
#endif
/* Each of our child cpusets must be a subset of us */
ret = -EBUSY;
cpuset_for_each_child(c, css, cur)
......@@ -551,6 +573,66 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
return ret;
}
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
static cpumask_var_t prefer_cpus_attach;
static void update_tasks_prefer_cpumask(struct cpuset *cs)
{
struct css_task_iter it;
struct task_struct *task;
css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it)))
set_prefer_cpus_ptr(task, cs->prefer_cpus);
css_task_iter_end(&it);
}
/*
* update_prefer_cpumask - update the prefer_cpus mask of a cpuset and
* all tasks in it
* @cs: the cpuset to consider
* @trialcs: trial cpuset
* @buf: buffer of cpu numbers written to this cpuset
*/
static int update_prefer_cpumask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf)
{
int retval;
if (cs == &top_cpuset)
return -EACCES;
/*
* An empty prefer_cpus is ok which mean that the cpuset tasks disable
* dynamic affinity feature.
* Since cpulist_parse() fails on an empty mask, we special case
* that parsing.
*/
if (!*buf) {
cpumask_clear(trialcs->prefer_cpus);
} else {
retval = cpulist_parse(buf, trialcs->prefer_cpus);
if (retval < 0)
return retval;
}
/* Nothing to do if the cpus didn't change */
if (cpumask_equal(cs->prefer_cpus, trialcs->prefer_cpus))
return 0;
if (!cpumask_subset(trialcs->prefer_cpus, cs->cpus_allowed))
return -EINVAL;
update_tasks_prefer_cpumask(trialcs);
spin_lock_irq(&callback_lock);
cpumask_copy(cs->prefer_cpus, trialcs->prefer_cpus);
spin_unlock_irq(&callback_lock);
return 0;
}
#endif
#ifdef CONFIG_SMP
/*
* Helper routine for generate_sched_domains().
......@@ -1543,6 +1625,10 @@ static void cpuset_attach(struct cgroup_taskset *tset)
else
guarantee_online_cpus(cs, cpus_attach);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_copy(prefer_cpus_attach, cs->prefer_cpus);
#endif
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cgroup_taskset_for_each(task, css, tset) {
......@@ -1551,6 +1637,9 @@ static void cpuset_attach(struct cgroup_taskset *tset)
* fail. TODO: have a better way to handle failure here
*/
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
set_prefer_cpus_ptr(task, prefer_cpus_attach);
#endif
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flag(cs, task);
......@@ -1610,6 +1699,9 @@ typedef enum {
FILE_MEMORY_PRESSURE,
FILE_SPREAD_PAGE,
FILE_SPREAD_SLAB,
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
FILE_DYNAMIC_CPULIST,
#endif
} cpuset_filetype_t;
static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
......@@ -1735,6 +1827,11 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
case FILE_MEMLIST:
retval = update_nodemask(cs, trialcs, buf);
break;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
case FILE_DYNAMIC_CPULIST:
retval = update_prefer_cpumask(cs, trialcs, buf);
break;
#endif
default:
retval = -EINVAL;
break;
......@@ -1778,6 +1875,11 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
case FILE_EFFECTIVE_MEMLIST:
seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
break;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
case FILE_DYNAMIC_CPULIST:
seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->prefer_cpus));
break;
#endif
default:
ret = -EINVAL;
}
......@@ -1935,7 +2037,15 @@ static struct cftype files[] = {
.write_u64 = cpuset_write_u64,
.private = FILE_MEMORY_PRESSURE_ENABLED,
},
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
{
.name = "preferred_cpus",
.seq_show = cpuset_common_seq_show,
.write = cpuset_write_resmask,
.max_write_len = (100U + 6 * NR_CPUS),
.private = FILE_DYNAMIC_CPULIST,
},
#endif
{ } /* terminate */
};
......@@ -1959,17 +2069,28 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
goto free_cs;
if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
goto free_cpus;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!alloc_cpumask_var(&cs->prefer_cpus, GFP_KERNEL))
goto free_effective_cpus;
#endif
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
cpumask_clear(cs->cpus_allowed);
nodes_clear(cs->mems_allowed);
cpumask_clear(cs->effective_cpus);
nodes_clear(cs->effective_mems);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_clear(cs->prefer_cpus);
#endif
fmeter_init(&cs->fmeter);
cs->relax_domain_level = -1;
return &cs->css;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_effective_cpus:
free_cpumask_var(cs->effective_cpus);
#endif
free_cpus:
free_cpumask_var(cs->cpus_allowed);
free_cs:
......@@ -2034,6 +2155,9 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
cs->effective_mems = parent->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_copy(cs->prefer_cpus, parent->prefer_cpus);
#endif
spin_unlock_irq(&callback_lock);
out_unlock:
mutex_unlock(&cpuset_mutex);
......@@ -2065,6 +2189,9 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
{
struct cpuset *cs = css_cs(css);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_cpumask_var(cs->prefer_cpus);
#endif
free_cpumask_var(cs->effective_cpus);
free_cpumask_var(cs->cpus_allowed);
kfree(cs);
......@@ -2099,6 +2226,9 @@ static void cpuset_fork(struct task_struct *task)
return;
set_cpus_allowed_ptr(task, &current->cpus_allowed);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
set_prefer_cpus_ptr(task, current->prefer_cpus);
#endif
task->mems_allowed = current->mems_allowed;
}
......@@ -2129,11 +2259,17 @@ int __init cpuset_init(void)
BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
BUG_ON(!alloc_cpumask_var(&top_cpuset.prefer_cpus, GFP_KERNEL));
#endif
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
cpumask_setall(top_cpuset.effective_cpus);
nodes_setall(top_cpuset.effective_mems);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_clear(top_cpuset.prefer_cpus);
#endif
fmeter_init(&top_cpuset.fmeter);
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
......@@ -2144,6 +2280,9 @@ int __init cpuset_init(void)
return err;
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
BUG_ON(!alloc_cpumask_var(&prefer_cpus_attach, GFP_KERNEL));
#endif
return 0;
}
......@@ -2180,6 +2319,9 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
struct cpumask *new_cpus, nodemask_t *new_mems,
bool cpus_updated, bool mems_updated)
{
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_t prefer_cpus;
#endif
bool is_empty;
spin_lock_irq(&callback_lock);
......@@ -2198,6 +2340,13 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
if (mems_updated && !nodes_empty(cs->mems_allowed))
update_tasks_nodemask(cs);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!cpumask_subset(cs->prefer_cpus, cs->cpus_allowed)) {
cpumask_and(&prefer_cpus, cs->prefer_cpus, cs->cpus_allowed);
cpumask_copy(cs->prefer_cpus, &prefer_cpus);
update_tasks_prefer_cpumask(cs);
}
#endif
is_empty = cpumask_empty(cs->cpus_allowed) ||
nodes_empty(cs->mems_allowed);
......
......@@ -459,6 +459,9 @@ void free_task(struct task_struct *tsk)
arch_release_task_struct(tsk);
if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
sched_prefer_cpus_free(tsk);
#endif
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
......@@ -888,6 +891,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->seccomp.filter = NULL;
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
tsk->prefer_cpus = NULL;
#endif
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
clear_tsk_need_resched(tsk);
......@@ -1862,6 +1869,12 @@ static __latent_entropy struct task_struct *copy_process(
if (retval < 0)
goto bad_fork_free;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
retval = sched_prefer_cpus_fork(p, current);
if (retval)
goto bad_fork_free;
#endif
/*
* If multiple threads are within copy_process(), then this check
* triggers too late. This doesn't hurt, the check is only there
......
......@@ -7191,6 +7191,101 @@ static int __maybe_unused cpu_period_quota_parse(char *buf,
return 0;
}
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
int sched_prefer_cpus_fork(struct task_struct *p, struct task_struct *orig)
{
p->prefer_cpus = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
if (!p->prefer_cpus)
return -ENOMEM;
if (orig->prefer_cpus)
cpumask_copy(p->prefer_cpus, orig->prefer_cpus);
else
cpumask_clear(p->prefer_cpus);
return 0;
}
void sched_prefer_cpus_free(struct task_struct *p)
{
kfree(p->prefer_cpus);
}
static void do_set_prefer_cpus(struct task_struct *p,
const struct cpumask *new_mask)
{
struct rq *rq = task_rq(p);
bool queued, running;
lockdep_assert_held(&p->pi_lock);
queued = task_on_rq_queued(p);
running = task_current(rq, p);
if (queued) {
/*
* Because __kthread_bind() calls this on blocked tasks without
* holding rq->lock.
*/
lockdep_assert_held(&rq->lock);
dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
}
if (running)
put_prev_task(rq, p);
cpumask_copy(p->prefer_cpus, new_mask);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
if (running)
set_curr_task(rq, p);
}
/*
* Change a given task's prefer CPU affinity. Prioritize migrate the thread to
* prefer cpus according to preferred bitmask.
*
* NOTE: the caller must have a valid reference to the task, the
* task must not exit() & deallocate itself prematurely. The
* call is not atomic; no spinlocks may be held.
*/
static int __set_prefer_cpus_ptr(struct task_struct *p,
const struct cpumask *new_mask, bool check)
{
struct rq_flags rf;
struct rq *rq;
int ret = 0;
if (unlikely(!p->prefer_cpus))
return -EINVAL;
rq = task_rq_lock(p, &rf);
update_rq_clock(rq);
if (cpumask_equal(p->prefer_cpus, new_mask))
goto out;
if (!cpumask_subset(new_mask, &p->cpus_allowed)) {
ret = -EINVAL;
goto out;
}
do_set_prefer_cpus(p, new_mask);
out:
task_rq_unlock(rq, p, &rf);
return ret;
}
int set_prefer_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
if (p->sched_class != &fair_sched_class)
return 0;
return __set_prefer_cpus_ptr(p, new_mask, false);
}
#endif
#ifdef CONFIG_CFS_BANDWIDTH
static int cpu_max_show(struct seq_file *sf, void *v)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册