diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b480e1a07ed85198e4d43dde761ae9420734dc95..5913deb26219a363a6e6cc6f2f56ab1ce625a52b 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -196,6 +196,10 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) mem_cgroup_handle_over_high(); blkcg_maybe_throttle_current(); +#ifdef CONFIG_QOS_SCHED + sched_qos_offline_wait(); +#endif + } #endif /* */ diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 18a29ca01bfe554d8ea9064f2f720a4242b32357..cea3957ebdbcd8f75f98ed819299788f500b48bd 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -160,10 +160,6 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_SIGPENDING) arch_do_signal(regs); -#ifdef CONFIG_QOS_SCHED - sched_qos_offline_wait(); -#endif - if (ti_work & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); @@ -198,8 +194,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) /* Flush pending rcuog wakeup before the last need_resched() check */ rcu_nocb_flush_deferred_wakeup(); - if (unlikely((ti_work & EXIT_TO_USER_MODE_WORK) || - sched_qos_cpu_overload())) + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) ti_work = exit_to_user_mode_loop(regs, ti_work); arch_exit_to_user_mode_prepare(regs, ti_work); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f577d581166b7db39c9b4e6f6e4dc26b609cd768..e85ae008e9da278f0947978b270b6d3be3389d6a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -26,6 +26,7 @@ #endif #ifdef CONFIG_QOS_SCHED #include +#include #endif /* @@ -7178,6 +7179,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ } #ifdef CONFIG_QOS_SCHED + static void start_qos_hrtimer(int cpu); static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) { @@ -7342,15 +7344,11 @@ void sched_qos_offline_wait(void) rcu_read_lock(); qos_level = task_group(current)->qos_level; rcu_read_unlock(); - if (qos_level != -1 || signal_pending(current)) + if (qos_level != -1 || fatal_signal_pending(current)) break; - msleep_interruptible(sysctl_offline_wait_interval); - } -} -int sched_qos_cpu_overload(void) -{ - return __this_cpu_read(qos_cpu_overload); + schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval)); + } } static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer) @@ -7383,6 +7381,23 @@ void init_qos_hrtimer(int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); hrtimer->function = qos_overload_timer_handler; } + +/* + * To avoid Priority inversion issues, when this cpu is qos_cpu_overload, + * we should schedule offline tasks to run so that they can leave kernel + * critical sections, and throttle them before returning to user mode. + */ +static void qos_schedule_throttle(struct task_struct *p) +{ + if (unlikely(current->flags & PF_KTHREAD)) + return; + + if (unlikely(this_cpu_read(qos_cpu_overload))) { + if (task_group(p)->qos_level < 0) + set_notify_resume(p); + } +} + #endif #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER @@ -7690,9 +7705,14 @@ done: __maybe_unused; update_misfit_status(p, rq); +#ifdef CONFIG_QOS_SCHED + qos_schedule_throttle(p); +#endif + #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER qos_smt_expel(this_cpu, p); #endif + return p; idle: