diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94acffa97e28d6176ae6f136c20107b91c33bb85..8456973e45e1984bfcdf0cc0b82bc37de0448091 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6975,8 +6975,10 @@ static int cpu_qos_write(struct cgroup_subsys_state *css, if (qos_level == -1) { policy = SCHED_IDLE; + cfs_bandwidth_usage_inc(); } else { policy = SCHED_NORMAL; + cfs_bandwidth_usage_dec(); } tg->qos_level = qos_level; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7258a270ea15e0233b809616e9b30735309f6f76..1ba17639758671588f0fa1e25091731cc061acdf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -99,6 +99,10 @@ int __weak arch_asym_cpu_priority(int cpu) } #endif +#ifdef CONFIG_QOS_SCHED +static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, qos_throttled_cfs_rq); +#endif + #ifdef CONFIG_CFS_BANDWIDTH /* * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool @@ -6869,6 +6873,128 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ set_last_buddy(se); } +#ifdef CONFIG_QOS_SCHED +static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) +{ + struct rq *rq = rq_of(cfs_rq); + struct sched_entity *se; + long task_delta, idle_task_delta, dequeue = 1; + + se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; + + /* freeze hierarchy runnable averages while throttled */ + rcu_read_lock(); + walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); + rcu_read_unlock(); + + task_delta = cfs_rq->h_nr_running; + idle_task_delta = cfs_rq->idle_h_nr_running; + for_each_sched_entity(se) { + struct cfs_rq *qcfs_rq = cfs_rq_of(se); + /* throttled entity or throttle-on-deactivate */ + if (!se->on_rq) + break; + + if (dequeue) + dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); + qcfs_rq->h_nr_running -= task_delta; + qcfs_rq->idle_h_nr_running -= idle_task_delta; + + if (qcfs_rq->load.weight) + dequeue = 0; + } + + if (!se) { + sub_nr_running(rq, task_delta); + } + + cfs_rq->throttled = 1; + cfs_rq->throttled_clock = rq_clock(rq); + + list_add(&cfs_rq->throttled_list, &per_cpu(qos_throttled_cfs_rq, cpu_of(rq))); +} + +static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) +{ + struct rq *rq = rq_of(cfs_rq); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + int enqueue = 1; + long task_delta, idle_task_delta; + + se = cfs_rq->tg->se[cpu_of(rq)]; + + cfs_rq->throttled = 0; + + update_rq_clock(rq); + + cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock; + list_del_init(&cfs_rq->throttled_list); + + /* update hierarchical throttle state */ + walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); + + if (!cfs_rq->load.weight) + return; + + task_delta = cfs_rq->h_nr_running; + idle_task_delta = cfs_rq->idle_h_nr_running; + for_each_sched_entity(se) { + if (se->on_rq) + enqueue = 0; + + cfs_rq = cfs_rq_of(se); + if (enqueue) + enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + cfs_rq->h_nr_running += task_delta; + cfs_rq->idle_h_nr_running += idle_task_delta; + + if (cfs_rq_throttled(cfs_rq)) + break; + } + + assert_list_leaf_cfs_rq(rq); + + if (!se) { + add_nr_running(rq, task_delta); + } + + /* Determine whether we need to wake up potentially idle CPU: */ + if (rq->curr == rq->idle && rq->cfs.nr_running) + resched_curr(rq); +} + +static int unthrottle_qos_cfs_rqs(int cpu) +{ + struct cfs_rq *cfs_rq, *tmp_rq; + int res = 0; + + list_for_each_entry_safe(cfs_rq, tmp_rq, &per_cpu(qos_throttled_cfs_rq, cpu), + throttled_list) { + if (cfs_rq_throttled(cfs_rq)) { + unthrottle_qos_cfs_rq(cfs_rq); + res++; + } + } + + return res; +} + +static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq) +{ + if (!cfs_bandwidth_used()) + return false; + + if (cfs_rq && cfs_rq->tg->qos_level < 0 && + !sched_idle_cpu(cpu_of(rq_of(cfs_rq)))) { + throttle_qos_cfs_rq(cfs_rq); + return true; + } + + return false; +} +#endif + static struct task_struct * pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { @@ -6927,6 +7053,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf se = pick_next_entity(cfs_rq, curr); cfs_rq = group_cfs_rq(se); +#ifdef CONFIG_QOS_SCHED + if (check_qos_cfs_rq(cfs_rq)) { + cfs_rq = &rq->cfs; + BUG_ON(cfs_rq->nr_running == 0); + } +#endif } while (cfs_rq); p = task_of(se); @@ -7016,6 +7148,12 @@ done: __maybe_unused; if (new_tasks > 0) goto again; +#ifdef CONFIG_QOS_SCHED + if (unthrottle_qos_cfs_rqs(cpu_of(rq))) { + goto again; + } +#endif + return NULL; } @@ -10683,6 +10821,14 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m) __init void init_sched_fair_class(void) { +#ifdef CONFIG_QOS_SCHED + int i; + + for_each_possible_cpu(i) { + INIT_LIST_HEAD(&per_cpu(qos_throttled_cfs_rq, i)); + } +#endif + #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);