diff --git a/fs/proc/array.c b/fs/proc/array.c index 9eb99a43f849a045d60b6f196d22e665a0e73e20..fd56e15b3463f8344c19f18860ade81899a8e4a2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -386,6 +386,16 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) cpumask_pr_args(&task->cpus_allowed)); } +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +static void task_cpus_preferred(struct seq_file *m, struct task_struct *task) +{ + seq_printf(m, "Cpus_preferred:\t%*pb\n", + cpumask_pr_args(task->prefer_cpus)); + seq_printf(m, "Cpus_preferred_list:\t%*pbl\n", + cpumask_pr_args(task->prefer_cpus)); +} +#endif + static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) { seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state); @@ -414,6 +424,9 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); task_context_switch_counts(m, task); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + task_cpus_preferred(m, task); +#endif return 0; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 928186f161000ada4d15289a8d9f0a8b8e89beb3..87cc3cbff9342ae35022111db1fb947f04de18c7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2000,4 +2000,17 @@ int sched_prefer_cpus_fork(struct task_struct *p, struct task_struct *orig); void sched_prefer_cpus_free(struct task_struct *p); #endif +#ifdef CONFIG_QOS_SCHED_SMART_GRID +extern struct static_key __smart_grid_used; +static inline bool smart_grid_used(void) +{ + return static_key_false(&__smart_grid_used); +} +#else +static inline bool smart_grid_used(void) +{ + return false; +} +#endif + #endif diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 04eb5b127867b096a3b532bded03da80f6ee7d52..ad472760e97d77d0777d32be6948b54c2e67c5a0 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -36,6 +36,10 @@ extern unsigned int sysctl_sched_child_runs_first; extern int sysctl_sched_util_low_pct; #endif +#ifdef CONFIG_QOS_SCHED_SMART_GRID +extern int sysctl_affinity_adjust_delay_ms; +#endif + enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, SCHED_TUNABLESCALING_LOG, diff --git a/init/Kconfig b/init/Kconfig index a2a733080fe1114861bd99fcca272dacfc4c23c3..ef1768d36a050a5cb1b83ac2efc2c30bce38289c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -834,6 +834,19 @@ config QOS_SCHED_DYNAMIC_AFFINITY of taskgroup is below threshold setted, otherwise make taskgroup to use cpus allowed. +config QOS_SCHED_SMART_GRID + bool "qos smart grid scheduler" + depends on FAIR_GROUP_SCHED && QOS_SCHED_DYNAMIC_AFFINITY + default n + help + This feature is used for power consumption tuning in server scenario. + This can be divided into the following aspects: + 1. User interface, manage user needs. + 2. Collect tasks' features to ensure key tasks' QOS. + 3. Weaken the influence the impact of CPU frequency and cpuidle + adjustment on tasks. + 4. Docking EAS (Energy Aware Scheduling) model. + config CGROUP_PIDS bool "PIDs controller" help diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 168e98c6d51a739f7b16dacf2c2cda29eca16847..a60f58afb28c19daf1c07cb19150cee4c4d2fc43 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5842,6 +5842,7 @@ int sched_cpu_activate(unsigned int cpu) static_branch_inc_cpuslocked(&sched_smt_present); #endif set_cpu_active(cpu, true); + tg_update_affinity_domains(cpu, 1); if (sched_smp_initialized) { sched_domains_numa_masks_set(cpu); @@ -5900,6 +5901,7 @@ int sched_cpu_deactivate(unsigned int cpu) return ret; } sched_domains_numa_masks_clear(cpu); + tg_update_affinity_domains(cpu, 0); return 0; } @@ -5970,6 +5972,8 @@ void __init sched_init_smp(void) init_sched_dl_class(); sched_smp_initialized = true; + + init_auto_affinity(&root_task_group); } static int __init migration_init(void) @@ -6530,6 +6534,9 @@ void sched_move_task(struct task_struct *tsk) DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; struct rq_flags rf; struct rq *rq; +#ifdef CONFIG_QOS_SCHED_SMART_GRID + struct affinity_domain *ad; +#endif rq = task_rq_lock(tsk, &rf); update_rq_clock(rq); @@ -6550,6 +6557,14 @@ void sched_move_task(struct task_struct *tsk) set_curr_task(rq, tsk); task_rq_unlock(rq, tsk, &rf); + +#ifdef CONFIG_QOS_SCHED_SMART_GRID + if (smart_grid_used()) { + ad = &task_group(tsk)->auto_affinity->ad; + set_prefer_cpus_ptr(tsk, ad->domains[ad->curr_level]); + } +#endif + } static inline struct task_group *css_tg(struct cgroup_subsys_state *css) @@ -6969,6 +6984,117 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, } #endif /* CONFIG_RT_GROUP_SCHED */ +#ifdef CONFIG_QOS_SCHED_SMART_GRID +int tg_set_dynamic_affinity_mode(struct task_group *tg, u64 mode) +{ + struct auto_affinity *auto_affi = tg->auto_affinity; + int ret = 0; + + raw_spin_lock_irq(&auto_affi->lock); + + /* auto mode*/ + if (mode == 1) { + start_auto_affinity(auto_affi); + } else if (mode == 0) { + stop_auto_affinity(auto_affi); + } else { + raw_spin_unlock_irq(&auto_affi->lock); + return -EINVAL; + } + + auto_affi->mode = mode; + raw_spin_unlock_irq(&auto_affi->lock); + + return ret; +} + +static u64 cpu_affinity_mode_read_u64(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct task_group *tg = css_tg(css); + + return tg->auto_affinity->mode; +} + +static int cpu_affinity_mode_write_u64(struct cgroup_subsys_state *css, + struct cftype *cftype, u64 mode) +{ + return tg_set_dynamic_affinity_mode(css_tg(css), mode); +} + +int tg_set_affinity_period(struct task_group *tg, u64 period_ms) +{ + if (period_ms > U64_MAX / NSEC_PER_MSEC) + return -EINVAL; + + raw_spin_lock_irq(&tg->auto_affinity->lock); + tg->auto_affinity->period = ms_to_ktime(period_ms); + raw_spin_unlock_irq(&tg->auto_affinity->lock); + return 0; +} + +u64 tg_get_affinity_period(struct task_group *tg) +{ + return ktime_to_ms(tg->auto_affinity->period); +} + +static int cpu_affinity_period_write_uint(struct cgroup_subsys_state *css, + struct cftype *cftype, u64 period) +{ + return tg_set_affinity_period(css_tg(css), period); +} + +static u64 cpu_affinity_period_read_uint(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return tg_get_affinity_period(css_tg(css)); +} + +static int cpu_affinity_domain_mask_write_u64(struct cgroup_subsys_state *css, + struct cftype *cftype, + u64 mask) +{ + struct task_group *tg = css_tg(css); + struct affinity_domain *ad = &tg->auto_affinity->ad; + u16 full = (1 << ad->dcount) - 1; + + if (mask > full) + return -EINVAL; + + raw_spin_lock_irq(&tg->auto_affinity->lock); + ad->domain_mask = mask; + raw_spin_unlock_irq(&tg->auto_affinity->lock); + return 0; +} + +static u64 cpu_affinity_domain_mask_read_u64(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct task_group *tg = css_tg(css); + + return tg->auto_affinity->ad.domain_mask; +} + +static int cpu_affinity_stat_show(struct seq_file *sf, void *v) +{ + struct task_group *tg = css_tg(seq_css(sf)); + struct auto_affinity *auto_affi = tg->auto_affinity; + struct affinity_domain *ad = &auto_affi->ad; + int i; + + seq_printf(sf, "period_active %d\n", auto_affi->period_active); + seq_printf(sf, "dcount %d\n", ad->dcount); + seq_printf(sf, "domain_mask 0x%x\n", ad->domain_mask); + seq_printf(sf, "curr_level %d\n", ad->curr_level); + for (i = 0; i < ad->dcount; i++) + seq_printf(sf, "sd_level %d, cpu list %*pbl, stay_cnt %llu\n", + i, cpumask_pr_args(ad->domains[i]), + schedstat_val(ad->stay_cnt[i])); + + return 0; +} +#endif /* CONFIG_QOS_SCHED_SMART_GRID */ + #ifdef CONFIG_QOS_SCHED static int tg_change_scheduler(struct task_group *tg, void *data) { @@ -7073,6 +7199,27 @@ static struct cftype cpu_legacy_files[] = { .read_s64 = cpu_qos_read, .write_s64 = cpu_qos_write, }, +#endif +#ifdef CONFIG_QOS_SCHED_SMART_GRID + { + .name = "dynamic_affinity_mode", + .read_u64 = cpu_affinity_mode_read_u64, + .write_u64 = cpu_affinity_mode_write_u64, + }, + { + .name = "affinity_period_ms", + .read_u64 = cpu_affinity_period_read_uint, + .write_u64 = cpu_affinity_period_write_uint, + }, + { + .name = "affinity_domain_mask", + .read_u64 = cpu_affinity_domain_mask_read_u64, + .write_u64 = cpu_affinity_domain_mask_write_u64, + }, + { + .name = "affinity_stat", + .seq_show = cpu_affinity_stat_show, + }, #endif { } /* Terminate */ }; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 77a49eae2cddc63ea9ed8f32d7935fe6412e6953..5a1d62fd4c0a5afa4e47ecceb3cf069b86b9618c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5281,6 +5281,417 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} #endif /* CONFIG_CFS_BANDWIDTH */ +#ifdef CONFIG_QOS_SCHED_SMART_GRID +#define AUTO_AFFINITY_DEFAULT_PERIOD_MS 2000 +#define IS_DOMAIN_SET(level, mask) ((1 << (level)) & (mask)) + +static inline unsigned long cpu_util(int cpu); +static unsigned long target_load(int cpu, int type); +static unsigned long capacity_of(int cpu); +static int sched_idle_cpu(int cpu); +static unsigned long weighted_cpuload(struct rq *rq); + +int sysctl_affinity_adjust_delay_ms = 5000; + +struct static_key __smart_grid_used; + +static void smart_grid_usage_inc(void) +{ + static_key_slow_inc_cpuslocked(&__smart_grid_used); +} + +static void smart_grid_usage_dec(void) +{ + static_key_slow_dec_cpuslocked(&__smart_grid_used); +} + +static void tg_update_task_prefer_cpus(struct task_group *tg) +{ + struct affinity_domain *ad = &tg->auto_affinity->ad; + struct task_struct *task; + struct css_task_iter it; + + css_task_iter_start(&tg->css, 0, &it); + while ((task = css_task_iter_next(&it))) { + if (tg == &root_task_group && !task->mm) + continue; + + set_prefer_cpus_ptr(task, ad->domains[ad->curr_level]); + } + css_task_iter_end(&it); +} + +static void affinity_domain_up(struct task_group *tg) +{ + struct affinity_domain *ad = &tg->auto_affinity->ad; + u16 level = ad->curr_level; + + if (ad->curr_level >= ad->dcount - 1) + return; + + while (level < ad->dcount) { + if (IS_DOMAIN_SET(level + 1, ad->domain_mask)) { + ad->curr_level = level + 1; + break; + } + level++; + } + + if (level == ad->dcount) + return; + + tg_update_task_prefer_cpus(tg); +} + +static void affinity_domain_down(struct task_group *tg) +{ + struct affinity_domain *ad = &tg->auto_affinity->ad; + u16 level = ad->curr_level; + + if (ad->curr_level <= 0) + return; + + while (level > 0) { + if (IS_DOMAIN_SET(level - 1, ad->domain_mask)) { + ad->curr_level = level - 1; + break; + } + level--; + } + + if (!level) + return; + + tg_update_task_prefer_cpus(tg); +} + +static enum hrtimer_restart sched_auto_affi_period_timer(struct hrtimer *timer) +{ + struct auto_affinity *auto_affi = + container_of(timer, struct auto_affinity, period_timer); + struct task_group *tg = auto_affi->tg; + struct affinity_domain *ad = &auto_affi->ad; + struct cpumask *span = ad->domains[ad->curr_level]; + unsigned long util_avg_sum = 0; + unsigned long tg_capacity = 0; + unsigned long flags; + int cpu; + + for_each_cpu(cpu, span) { + util_avg_sum += cpu_util(cpu); + tg_capacity += capacity_of(cpu); + } + + if (unlikely(!tg_capacity)) + return HRTIMER_RESTART; + + raw_spin_lock_irqsave(&auto_affi->lock, flags); + if (util_avg_sum * 100 > tg_capacity * sysctl_sched_util_low_pct) { + affinity_domain_up(tg); + } else if (util_avg_sum * 100 < tg_capacity * + sysctl_sched_util_low_pct / 2) { + affinity_domain_down(tg); + } + + schedstat_inc(ad->stay_cnt[ad->curr_level]); + hrtimer_forward_now(timer, auto_affi->period); + raw_spin_unlock_irqrestore(&auto_affi->lock, flags); + return HRTIMER_RESTART; +} + +static int tg_update_affinity_domain_down(struct task_group *tg, void *data) +{ + struct auto_affinity *auto_affi = tg->auto_affinity; + struct affinity_domain *ad; + int *cpu_state = data; + unsigned long flags; + int i; + + if (!auto_affi) + return 0; + + ad = &tg->auto_affinity->ad; + raw_spin_lock_irqsave(&auto_affi->lock, flags); + + for (i = 0; i < ad->dcount; i++) { + if (!cpumask_test_cpu(cpu_state[0], ad->domains_orig[i])) + continue; + + /* online */ + if (cpu_state[1]) + cpumask_set_cpu(cpu_state[0], ad->domains[i]); + else + cpumask_clear_cpu(cpu_state[0], ad->domains[i]); + } + raw_spin_unlock_irqrestore(&auto_affi->lock, flags); + + if (!smart_grid_used()) + return 0; + + if (auto_affi->mode) + tg_update_task_prefer_cpus(tg); + return 0; +} + +void tg_update_affinity_domains(int cpu, int online) +{ + int cpu_state[2]; + + cpu_state[0] = cpu; + cpu_state[1] = online; + + rcu_read_lock(); + walk_tg_tree(tg_update_affinity_domain_down, tg_nop, cpu_state); + rcu_read_unlock(); +} + +void start_auto_affinity(struct auto_affinity *auto_affi) +{ + struct task_group *tg = auto_affi->tg; + ktime_t delay_ms; + + if (auto_affi->period_active == 1) + return; + + tg_update_task_prefer_cpus(tg); + + auto_affi->period_active = 1; + delay_ms = ms_to_ktime(sysctl_affinity_adjust_delay_ms); + hrtimer_forward_now(&auto_affi->period_timer, delay_ms); + hrtimer_start_expires(&auto_affi->period_timer, + HRTIMER_MODE_ABS_PINNED); + smart_grid_usage_inc(); +} + +void stop_auto_affinity(struct auto_affinity *auto_affi) +{ + struct task_group *tg = auto_affi->tg; + struct affinity_domain *ad = &auto_affi->ad; + + if (auto_affi->period_active == 0) + return; + + hrtimer_cancel(&auto_affi->period_timer); + auto_affi->period_active = 0; + ad->curr_level = ad->dcount > 0 ? ad->dcount - 1 : 0; + + tg_update_task_prefer_cpus(tg); + smart_grid_usage_dec(); +} + +static struct sched_group *sd_find_idlest_group(struct sched_domain *sd) +{ + struct sched_group *idlest = NULL, *group = sd->groups; + unsigned long min_runnable_load = ULONG_MAX; + unsigned long min_avg_load = ULONG_MAX; + int imbalance_scale = 100 + (sd->imbalance_pct-100)/2; + unsigned long imbalance = scale_load_down(NICE_0_LOAD) * + (sd->imbalance_pct-100) / 100; + + do { + unsigned long load, avg_load, runnable_load; + int i; + + avg_load = 0; + runnable_load = 0; + + for_each_cpu(i, sched_group_span(group)) { + load = target_load(i, 0); + runnable_load += load; + avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); + } + + avg_load = (avg_load * SCHED_CAPACITY_SCALE) / + group->sgc->capacity; + runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) / + group->sgc->capacity; + + if (min_runnable_load > (runnable_load + imbalance)) { + min_runnable_load = runnable_load; + min_avg_load = avg_load; + idlest = group; + } else if ((runnable_load < (min_runnable_load + imbalance)) && + (100*min_avg_load > imbalance_scale*avg_load)) { + min_avg_load = avg_load; + idlest = group; + } + } while (group = group->next, group != sd->groups); + + return idlest ? idlest : group; +} + +static int group_find_idlest_cpu(struct sched_group *group) +{ + int least_loaded_cpu = cpumask_first(sched_group_span(group)); + unsigned long load, min_load = ULONG_MAX; + unsigned int min_exit_latency = UINT_MAX; + u64 latest_idle_timestamp = 0; + int shallowest_idle_cpu = -1; + int i; + + if (group->group_weight == 1) + return least_loaded_cpu; + + for_each_cpu(i, sched_group_span(group)) { + if (sched_idle_cpu(i)) + return i; + + if (available_idle_cpu(i)) { + struct rq *rq = cpu_rq(i); + struct cpuidle_state *idle = idle_get_state(rq); + + if (idle && idle->exit_latency < min_exit_latency) { + min_exit_latency = idle->exit_latency; + latest_idle_timestamp = rq->idle_stamp; + shallowest_idle_cpu = i; + } else if ((!idle || + idle->exit_latency == min_exit_latency) && + rq->idle_stamp > latest_idle_timestamp) { + latest_idle_timestamp = rq->idle_stamp; + shallowest_idle_cpu = i; + } + } else if (shallowest_idle_cpu == -1) { + load = weighted_cpuload(cpu_rq(i)); + if (load < min_load) { + min_load = load; + least_loaded_cpu = i; + } + } + } + + return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : + least_loaded_cpu; +} + +void free_affinity_domains(struct affinity_domain *ad) +{ + int i; + + for (i = 0; i < ad->dcount; i++) { + kfree(ad->domains[i]); + ad->domains[i] = NULL; + } + ad->dcount = 0; +} + +static int init_affinity_domains_orig(struct affinity_domain *ad) +{ + int i, j; + + for (i = 0; i < ad->dcount; i++) { + ad->domains_orig[i] = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!ad->domains_orig[i]) + goto err; + + cpumask_copy(ad->domains_orig[i], ad->domains[i]); + } + + return 0; +err: + for (j = 0; j < i; j++) { + kfree(ad->domains_orig[j]); + ad->domains_orig[i] = NULL; + } + return -ENOMEM; +} + +static int init_affinity_domains(struct affinity_domain *ad) +{ + struct sched_domain *sd = NULL, *tmp; + struct sched_group *idlest = NULL; + int ret = -ENOMEM; + int dcount = 0; + int i = 0; + int cpu; + + rcu_read_lock(); + cpu = cpumask_first_and(cpu_active_mask, + housekeeping_cpumask(HK_FLAG_DOMAIN)); + for_each_domain(cpu, tmp) { + sd = tmp; + dcount++; + } + + if (!sd) { + ad->dcount = 0; + rcu_read_unlock(); + return -EINVAL; + } + rcu_read_unlock(); + + for (i = 0; i < dcount; i++) { + ad->domains[i] = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!ad->domains[i]) + goto err; + } + + rcu_read_lock(); + idlest = sd_find_idlest_group(sd); + cpu = group_find_idlest_cpu(idlest); + i = 0; + for_each_domain(cpu, tmp) { + cpumask_copy(ad->domains[i], sched_domain_span(tmp)); + __schedstat_set(ad->stay_cnt[i], 0); + i++; + } + rcu_read_unlock(); + + ad->dcount = dcount; + ad->curr_level = ad->dcount > 0 ? ad->dcount - 1 : 0; + ad->domain_mask = (1 << ad->dcount) - 1; + + ret = init_affinity_domains_orig(ad); + if (ret) + goto err; + + return 0; +err: + free_affinity_domains(ad); + return ret; +} + +int init_auto_affinity(struct task_group *tg) +{ + struct auto_affinity *auto_affi; + int ret; + + auto_affi = kmalloc(sizeof(*auto_affi), GFP_KERNEL); + if (!auto_affi) + return -ENOMEM; + + raw_spin_lock_init(&auto_affi->lock); + auto_affi->mode = 0; + auto_affi->period_active = 0; + auto_affi->period = ms_to_ktime(AUTO_AFFINITY_DEFAULT_PERIOD_MS); + hrtimer_init(&auto_affi->period_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED); + auto_affi->period_timer.function = sched_auto_affi_period_timer; + + ret = init_affinity_domains(&auto_affi->ad); + if (ret) { + kfree(auto_affi); + return ret; + } + + auto_affi->tg = tg; + tg->auto_affinity = auto_affi; + return 0; +} + +static void destroy_auto_affinity(struct task_group *tg) +{ + struct auto_affinity *auto_affi = tg->auto_affinity; + + hrtimer_cancel(&auto_affi->period_timer); + free_affinity_domains(&auto_affi->ad); + + kfree(tg->auto_affinity); + tg->auto_affinity = NULL; +} +#else +static void destroy_auto_affinity(struct task_group *tg) {} +#endif + /************************************************** * CFS operations on tasks: */ @@ -6733,7 +7144,7 @@ static inline bool prefer_cpus_valid(struct task_struct *p) /* * set_task_select_cpus: select the cpu range for task * @p: the task whose available cpu range will to set - * @idlest_cpu: the cpu which is the idlest in prefer cpus + *uto_affinity_used @idlest_cpu: the cpu which is the idlest in prefer cpus * * If sum of 'util_avg' among 'preferred_cpus' lower than the percentage * 'sysctl_sched_util_low_pct' of 'preferred_cpus' capacity, select @@ -6757,6 +7168,13 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, if (!prefer_cpus_valid(p)) return; + if (smart_grid_used()) { + p->select_cpus = p->prefer_cpus; + if (idlest_cpu) + *idlest_cpu = cpumask_first(p->select_cpus); + return; + } + rcu_read_lock(); tg = task_group(p); for_each_cpu(cpu, p->prefer_cpus) { @@ -10928,6 +11346,7 @@ void free_fair_sched_group(struct task_group *tg) int i; destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); + destroy_auto_affinity(tg); for_each_possible_cpu(i) { #ifdef CONFIG_QOS_SCHED @@ -10948,7 +11367,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { struct sched_entity *se; struct cfs_rq *cfs_rq; - int i; + int i, ret; tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL); if (!tg->cfs_rq) @@ -10960,6 +11379,9 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) tg->shares = NICE_0_LOAD; init_cfs_bandwidth(tg_cfs_bandwidth(tg)); + ret = init_auto_affinity(tg); + if (ret) + goto err; for_each_possible_cpu(i) { cfs_rq = kzalloc_node(sizeof(struct cfs_rq), @@ -10982,6 +11404,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) err_free_rq: kfree(cfs_rq); err: + destroy_auto_affinity(tg); return 0; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ae3068153093881ef4f6743377c2ef2869e22dd6..1d882a2b8d5fee1d870a63e2d5c485d99bc05da5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -361,6 +361,34 @@ struct cfs_bandwidth { #endif }; + +#ifdef CONFIG_QOS_SCHED_SMART_GRID +#define AD_LEVEL_MAX 8 + +struct affinity_domain { + int dcount; + int curr_level; + u32 domain_mask; +#ifdef CONFIG_SCHEDSTATS + u64 stay_cnt[AD_LEVEL_MAX]; +#endif + struct cpumask *domains[AD_LEVEL_MAX]; + struct cpumask *domains_orig[AD_LEVEL_MAX]; +}; +#endif + +struct auto_affinity { +#ifdef CONFIG_QOS_SCHED_SMART_GRID + raw_spinlock_t lock; + u64 mode; + ktime_t period; + struct hrtimer period_timer; + int period_active; + struct affinity_domain ad; + struct task_group *tg; +#endif +}; + /* Task group related information */ struct task_group { struct cgroup_subsys_state css; @@ -407,7 +435,12 @@ struct task_group { #else KABI_RESERVE(1) #endif + +#if defined(CONFIG_QOS_SCHED_SMART_GRID) && !defined(__GENKSYMS__) + struct auto_affinity *auto_affinity; +#else KABI_RESERVE(2) +#endif }; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -475,6 +508,21 @@ extern void sched_offline_group(struct task_group *tg); extern void sched_move_task(struct task_struct *tsk); +#ifdef CONFIG_QOS_SCHED_SMART_GRID +extern void start_auto_affinity(struct auto_affinity *auto_affi); +extern void stop_auto_affinity(struct auto_affinity *auto_affi); +extern int init_auto_affinity(struct task_group *tg); +extern void tg_update_affinity_domains(int cpu, int online); + +#else +static inline int init_auto_affinity(struct task_group *tg) +{ + return 0; +} + +static inline void tg_update_affinity_domains(int cpu, int online) {} +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ad62ea156afd98261dc2227f48cdb70d6b40a8d8..592440a2d5db09db324b118a11cdf37522561a2e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1336,6 +1336,15 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, +#endif +#ifdef CONFIG_QOS_SCHED_SMART_GRID + { + .procname = "affinity_adjust_delay_ms", + .data = &sysctl_affinity_adjust_delay_ms, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif { } };