提交 dad99a57 编写于 作者: B briansun

futex: introduce the direct-thread-switch mechanism

openeuler inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4L9RU
CVE: NA

Reference: https://lore.kernel.org/lkml/20200722234538.166697-2-posk@posk.io/

-------------------

In some scenarios, we need to run several low-thrashing required threads
together which act as logical operations like PV operations. This kind of
thread always falls asleep and wakes other threads up, and thread switching
requires the kernel to do several scheduling related overheads (Select the
proper core to execute, wake the task up, enqueue the task, mark the task
scheduling flag, pick the task at the proper time, dequeue the task and do
context switching). These overheads mentioned above are not accepted for the
low-thrashing threads. Therefore, we require a mechanism to decline the
unnecessary overhead and to swap threads directly without affecting the
fairness of CFS tasks.

To achieve this goal, we implemented the direct-thread-switch mechanism
based on the futex_swap patch*, which switches the DTS task directly with
the shared schedule entity. Also, we ensured the kernel keeps secure and
consistent basically.
Signed-off-by: NZhi Song <hizhisong@gmail.com>
上级 8871eed8
...@@ -68,6 +68,11 @@ struct task_delay_info; ...@@ -68,6 +68,11 @@ struct task_delay_info;
struct task_group; struct task_group;
struct io_uring_task; struct io_uring_task;
#define NONE_BY_PASS 0x0000
#define INIT_BY_PASS 0x0001
#define IN_BY_PASS 0x0002
#define END_BY_PASS 0x0004
/* /*
* Task state bitmask. NOTE! These bits are also * Task state bitmask. NOTE! These bits are also
* encoded in fs/proc/array.c: get_task_state(). * encoded in fs/proc/array.c: get_task_state().
...@@ -500,6 +505,10 @@ struct sched_entity { ...@@ -500,6 +505,10 @@ struct sched_entity {
unsigned long runnable_weight; unsigned long runnable_weight;
#endif #endif
#ifdef CONFIG_DTS
int by_pass;
#endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* Per entity load average tracking. * Per entity load average tracking.
...@@ -726,6 +735,15 @@ struct task_struct { ...@@ -726,6 +735,15 @@ struct task_struct {
int normal_prio; int normal_prio;
unsigned int rt_priority; unsigned int rt_priority;
#ifdef CONFIG_DTS
/*
* by_pass indicate that the task is launched by direct-thread-switch.
* dts_shared_se is the schedule entity shared with DTS task.
*/
int by_pass;
struct sched_entity dts_shared_se;
#endif
const struct sched_class *sched_class; const struct sched_class *sched_class;
struct sched_entity se; struct sched_entity se;
struct sched_rt_entity rt; struct sched_rt_entity rt;
...@@ -2194,6 +2212,10 @@ static inline int sched_qos_cpu_overload(void) ...@@ -2194,6 +2212,10 @@ static inline int sched_qos_cpu_overload(void)
} }
#endif #endif
#ifdef CONFIG_DTS
extern int check_task_left_time(struct task_struct *task);
#endif
#ifdef CONFIG_BPF_SCHED #ifdef CONFIG_BPF_SCHED
extern void sched_settag(struct task_struct *tsk, s64 tag); extern void sched_settag(struct task_struct *tsk, s64 tag);
......
...@@ -25,7 +25,14 @@ ...@@ -25,7 +25,14 @@
#define FUTEX_PRIVATE_FLAG 128 #define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256 #define FUTEX_CLOCK_REALTIME 256
#ifdef CONFIG_DTS
#define FUTEX_FLAGS_DTS_MODE 512
#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME | \
FUTEX_FLAGS_DTS_MODE)
#else
#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME) #define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
#endif
#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
...@@ -43,6 +50,7 @@ ...@@ -43,6 +50,7 @@
FUTEX_PRIVATE_FLAG) FUTEX_PRIVATE_FLAG)
#define FUTEX_SWAP_PRIVATE (FUTEX_SWAP | FUTEX_PRIVATE_FLAG) #define FUTEX_SWAP_PRIVATE (FUTEX_SWAP | FUTEX_PRIVATE_FLAG)
/* /*
* Support for robust futexes: the kernel cleans up held futexes at * Support for robust futexes: the kernel cleans up held futexes at
* thread exit time. * thread exit time.
......
...@@ -1261,6 +1261,13 @@ config SCHED_STEAL ...@@ -1261,6 +1261,13 @@ config SCHED_STEAL
If unsure, say N here. If unsure, say N here.
config DTS
bool "Direct Thread Switch"
default y
depends on SCHED_STEAL
help
enable the direct thread switch mechanism in the futex_swap operation
config CHECKPOINT_RESTORE config CHECKPOINT_RESTORE
bool "Checkpoint/restore support" bool "Checkpoint/restore support"
select PROC_CHILDREN select PROC_CHILDREN
......
...@@ -39,11 +39,16 @@ ...@@ -39,11 +39,16 @@
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/fault-inject.h> #include <linux/fault-inject.h>
#include <linux/time_namespace.h> #include <linux/time_namespace.h>
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <asm/futex.h> #include <asm/futex.h>
#include "locking/rtmutex_common.h" #include "locking/rtmutex_common.h"
#ifdef CONFIG_DTS
#include "sched/sched.h"
#endif
/* /*
* READ this before attempting to hack on futexes! * READ this before attempting to hack on futexes!
* *
...@@ -161,7 +166,7 @@ static int __read_mostly futex_cmpxchg_enabled; ...@@ -161,7 +166,7 @@ static int __read_mostly futex_cmpxchg_enabled;
* NOMMU does not have per process address space. Let the compiler optimize * NOMMU does not have per process address space. Let the compiler optimize
* code away. * code away.
*/ */
# define FLAGS_SHARED 0x00 #define FLAGS_SHARED 0x00
#endif #endif
#define FLAGS_CLOCKRT 0x02 #define FLAGS_CLOCKRT 0x02
#define FLAGS_HAS_TIMEOUT 0x04 #define FLAGS_HAS_TIMEOUT 0x04
...@@ -2585,6 +2590,219 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) ...@@ -2585,6 +2590,219 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
return 0; return 0;
} }
#ifdef CONFIG_DTS
static int __direct_thread_switch(struct task_struct *next)
{
int cpu = smp_processor_id();
int success = 1;
struct rq_flags rf;
struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq = &rq->cfs;
struct task_struct *prev = rq->curr;
struct sched_entity *prev_se, *next_se;
unsigned long *switch_count = &prev->nvcsw;
unsigned long prev_state;
int next_state;
struct rq *src_rq_next;
bool locked;
preempt_disable();
local_irq_disable();
if (!prev->by_pass) {
prev_se = &prev->se;
} else {
prev_se = &prev->dts_shared_se;
}
next_se = &next->se;
prev->by_pass = NONE_BY_PASS;
next->by_pass = INIT_BY_PASS;
next->dts_shared_se = *prev_se;
prev_se->by_pass = NONE_BY_PASS;
next->dts_shared_se.by_pass = INIT_BY_PASS;
/* task_struct::state is volatile so far */
next_state = next->state;
src_rq_next = task_rq(next);
locked = true;
/* Deliver the execution to the callee. */
if (next_state == TASK_RUNNING) {
/* The next is running now. */
if (task_running(src_rq_next, next)) {
success = 0;
goto end;
}
/* The next task is runnable, and may stay in the current core's rq or other cores' rq. */
/* Dequeue the next task's se (rather than dts_shared_se) to keep fairness and consistence.
* Enqueue the next task's se when the task expired.
*/
if (task_rq(next) != rq) {
#ifdef CONFIG_SCHED_STEAL
/* migrate */
if (!steal_task(rq, &rf, &locked, next)) {
success = 0;
goto end;
}
#else
success = 0;
goto end;
#endif
}
replace_shared_entity(cfs_rq, next_se, &next->dts_shared_se);
} else if (next_state == TASK_INTERRUPTIBLE) {
/*
*
* The next task in the sleeping state caused by futex_swap, futex_wait,
* can be woken up here so far, but signals, and other interruptible situations
* need to be implemented here.
* P.S. We pick up the next task from the wake list of the corresponding futex_t.
*/
/* Enqueue the shared_se and change the state without entering schedule() path. */
if (!wake_up_process_prefer_current_cpu(next)) {
success = 0;
goto end;
}
/* success to wakeup (set p->state = TASK_RUNNING) */
/* dequeue the shared_se and set rq->curr = &next->dts_shared_se; */
set_next_entity(cfs_rq, &next->dts_shared_se);
} else {
success = 0;
goto end;
}
/* increase rq->cfs.nr_running */
cfs_rq->nr_running++;
sched_submit_work(prev);
rcu_note_context_switch(false);
/*
* Make sure that signal_pending_state()->signal_pending() below
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
* done by the caller(futex_wait_queue_me) to avoid the race with signal_wake_up():
*
* __set_current_state(@state) signal_wake_up()
* __direct_thread_switch() set_tsk_thread_flag(p, TIF_SIGPENDING)
* wake_up_state(p, state)
* LOCK rq->lock LOCK p->pi_state
* smp_mb__after_spinlock() smp_mb__after_spinlock()
* if (signal_pending_state()) if (p->state & @state)
*
* Also, the membarrier system call requires a full memory barrier
* after coming from user-space, before storing to rq->curr.
*/
rq_lock(rq, &rf);
smp_mb__after_spinlock();
/*
* We may fail to switch, so do not deactivate the current task before
* process the next.
*/
/*
* We must load prev->state once (task_struct::state is volatile), such
* that:
*
* - we form a control dependency vs deactivate_task() below.
* - ptrace_{,un}freeze_traced() can change ->state underneath us.
*/
prev_state = prev->state;
if (prev_state) {
if (signal_pending_state(prev_state, prev)) {
prev->state = TASK_RUNNING;
} else {
prev->sched_contributes_to_load =
(prev_state & TASK_UNINTERRUPTIBLE) &&
!(prev_state & TASK_NOLOAD) &&
!(prev->flags & PF_FROZEN);
if (prev->sched_contributes_to_load)
rq->nr_uninterruptible++;
/*
* __schedule() ttwu()
* prev_state = prev->state; if (p->on_rq && ...)
* if (prev_state) goto out;
* p->on_rq = 0; smp_acquire__after_ctrl_dep();
* p->state = TASK_WAKING
*
* Where __schedule() and ttwu() have matching control dependencies.
*
* After this, schedule() must not care about p->state any more.
*/
deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
if (prev->in_iowait) {
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}
}
}
rq->nr_switches++;
/*
* RCU users of rcu_dereference(rq->curr) may not see
* changes to task_struct made by pick_next_task().
*/
RCU_INIT_POINTER(rq->curr, next);
/*
* The membarrier system call requires each architecture
* to have a full memory barrier after updating
* rq->curr, before returning to user-space.
*
* Here are the schemes providing that barrier on the
* various architectures:
* - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
* switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
* - finish_lock_switch() for weakly-ordered
* architectures where spin_unlock is a full barrier,
* - switch_to() for arm64 (weakly-ordered, spin_unlock
* is a RELEASE barrier),
*/
++*switch_count;
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
trace_sched_switch(false, prev, next);
/* do the get_task_struct() in the futex_wait_queue_me() before */
put_task_struct(next);
rq = context_switch(rq, prev, next, &rf);
balance_callback(rq);
sched_update_worker(next);
end:
sched_preempt_enable_no_resched();
return success;
}
/*
* return
* 0 for fail
* 1 for succeed
*/
static int direct_thread_switch(struct task_struct *next)
{
if (next->sched_class != &fair_sched_class ||
current == next) {
return 0;
}
if (!check_task_left_time(current)) {
return 0;
}
return __direct_thread_switch(next);
}
#endif /* CONFIG_DTS */
/** /**
* futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
* @hb: the futex hash bucket, must be locked by the caller * @hb: the futex hash bucket, must be locked by the caller
...@@ -2595,7 +2813,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) ...@@ -2595,7 +2813,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
*/ */
static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
struct hrtimer_sleeper *timeout, struct hrtimer_sleeper *timeout,
struct task_struct *next) struct task_struct *next, int flags)
{ {
/* /*
* The task state is guaranteed to be set before another task can * The task state is guaranteed to be set before another task can
...@@ -2615,6 +2833,9 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, ...@@ -2615,6 +2833,9 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
* has tried to wake us, and we can skip the call to schedule(). * has tried to wake us, and we can skip the call to schedule().
*/ */
if (likely(!plist_node_empty(&q->list))) { if (likely(!plist_node_empty(&q->list))) {
#ifdef CONFIG_DTS
int do_dts_switch = 0;
#endif
/* /*
* If the timer has already expired, current will already be * If the timer has already expired, current will already be
* flagged for rescheduling. Only call schedule if there * flagged for rescheduling. Only call schedule if there
...@@ -2622,27 +2843,49 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, ...@@ -2622,27 +2843,49 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
*/ */
if (!timeout || timeout->task) { if (!timeout || timeout->task) {
if (next) { if (next) {
#ifdef CONFIG_DTS
/* /*
* wake_up_process() below will be replaced * If we fail to switch to the next task directly, try to switch to
* in the next patch with * the next task in the traditional way.
* wake_up_process_prefer_current_cpu(). *
*/ */
if (flags & FUTEX_FLAGS_DTS_MODE)
do_dts_switch = direct_thread_switch(next);
if (!do_dts_switch)
#endif
{
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
wake_up_process_prefer_current_cpu(next); wake_up_process_prefer_current_cpu(next);
#else #else
wake_up_process(next); wake_up_process(next);
#endif
}
#ifdef CONFIG_DTS
if (!do_dts_switch)
#endif #endif
put_task_struct(next); put_task_struct(next);
next = NULL; next = NULL;
} }
freezable_schedule(); #ifdef CONFIG_DTS
if (!do_dts_switch)
#endif
freezable_schedule();
} }
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
if (next) { if (next) {
#ifdef CONFIG_DTS
direct_thread_switch(next);
#else
wake_up_process(next); wake_up_process(next);
put_task_struct(next); put_task_struct(next);
#endif
} }
} }
...@@ -2743,7 +2986,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ...@@ -2743,7 +2986,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
goto out; goto out;
/* queue_me and wait for wakeup, timeout, or a signal. */ /* queue_me and wait for wakeup, timeout, or a signal. */
futex_wait_queue_me(hb, &q, to, next); futex_wait_queue_me(hb, &q, to, next, flags);
next = NULL; next = NULL;
/* If we were woken (and unqueued), we succeeded, whatever. */ /* If we were woken (and unqueued), we succeeded, whatever. */
...@@ -2819,6 +3062,15 @@ static int futex_swap(u32 __user *uaddr, unsigned int flags, u32 val, ...@@ -2819,6 +3062,15 @@ static int futex_swap(u32 __user *uaddr, unsigned int flags, u32 val,
next->wake_q.next = NULL; next->wake_q.next = NULL;
} }
/* Basic security test. (Are the two tasks in the same group?) */
/* Have any time slices to be used? */
/*
* The old one will go to sleep and enqueue the rq, meanwhile, get
* the new one to run.
*/
return futex_wait(uaddr, flags, val, abs_time, bitset, next); return futex_wait(uaddr, flags, val, abs_time, bitset, next);
} }
...@@ -3282,7 +3534,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ...@@ -3282,7 +3534,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
} }
/* Queue the futex_q, drop the hb lock, wait for wakeup. */ /* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to, NULL); futex_wait_queue_me(hb, &q, to, NULL, flags);
spin_lock(&hb->lock); spin_lock(&hb->lock);
ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
...@@ -3768,6 +4020,12 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ...@@ -3768,6 +4020,12 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
int cmd = op & FUTEX_CMD_MASK; int cmd = op & FUTEX_CMD_MASK;
unsigned int flags = 0; unsigned int flags = 0;
#ifdef CONFIG_DTS
if (op & FUTEX_FLAGS_DTS_MODE) {
flags |= FUTEX_FLAGS_DTS_MODE;
}
#endif
if (!(op & FUTEX_PRIVATE_FLAG)) if (!(op & FUTEX_PRIVATE_FLAG))
flags |= FLAGS_SHARED; flags |= FLAGS_SHARED;
......
...@@ -2469,7 +2469,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) ...@@ -2469,7 +2469,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
struct rq_flags *rf) struct rq_flags *rf)
{ {
check_preempt_curr(rq, p, wake_flags); #ifdef CONFIG_DTS
if (p->by_pass != INIT_BY_PASS)
#endif
check_preempt_curr(rq, p, wake_flags);
p->state = TASK_RUNNING; p->state = TASK_RUNNING;
trace_sched_wakeup(p); trace_sched_wakeup(p);
...@@ -2996,7 +3000,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -2996,7 +3000,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
out: out:
if (success) if (success)
ttwu_stat(p, task_cpu(p), wake_flags); ttwu_stat(p, task_cpu(p), wake_flags);
preempt_enable(); #ifdef CONFIG_DTS
if (p->by_pass == INIT_BY_PASS) {
p->by_pass = IN_BY_PASS;
p->se.by_pass = IN_BY_PASS;
p->dts_shared_se.by_pass = IN_BY_PASS;
preempt_enable_no_resched();
}
else
#endif
preempt_enable();
return success; return success;
} }
...@@ -3086,6 +3099,16 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -3086,6 +3099,16 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.vruntime = 0; p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node); INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_DTS
p->dts_shared_se.on_rq = 0;
p->dts_shared_se.exec_start = 0;
p->dts_shared_se.sum_exec_runtime = 0;
p->dts_shared_se.prev_sum_exec_runtime = 0;
p->dts_shared_se.nr_migrations = 0;
p->dts_shared_se.vruntime = 0;
INIT_LIST_HEAD(&p->dts_shared_se.group_node);
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL; p->se.cfs_rq = NULL;
#endif #endif
...@@ -3315,6 +3338,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -3315,6 +3338,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
init_entity_runnable_average(&p->se); init_entity_runnable_average(&p->se);
#ifdef CONFIG_DTS
p->by_pass = NONE_BY_PASS;
p->se.by_pass = NONE_BY_PASS;
p->dts_shared_se.by_pass = NONE_BY_PASS;
#endif
#ifdef CONFIG_SCHED_INFO #ifdef CONFIG_SCHED_INFO
if (likely(sched_info_on())) if (likely(sched_info_on()))
...@@ -3702,6 +3730,11 @@ static struct rq *finish_task_switch(struct task_struct *prev) ...@@ -3702,6 +3730,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
membarrier_mm_sync_core_before_usermode(mm); membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm); mmdrop(mm);
} }
#ifdef CONFIG_DTS
prev->by_pass = NONE_BY_PASS;
prev->se.by_pass = NONE_BY_PASS;
prev->dts_shared_se.by_pass = NONE_BY_PASS;
#endif
if (unlikely(prev_state == TASK_DEAD)) { if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead) if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev); prev->sched_class->task_dead(prev);
...@@ -3744,7 +3777,7 @@ static void __balance_callback(struct rq *rq) ...@@ -3744,7 +3777,7 @@ static void __balance_callback(struct rq *rq)
raw_spin_unlock_irqrestore(&rq->lock, flags); raw_spin_unlock_irqrestore(&rq->lock, flags);
} }
static inline void balance_callback(struct rq *rq) inline void balance_callback(struct rq *rq)
{ {
if (unlikely(rq->balance_callback)) if (unlikely(rq->balance_callback))
__balance_callback(rq); __balance_callback(rq);
...@@ -3752,7 +3785,7 @@ static inline void balance_callback(struct rq *rq) ...@@ -3752,7 +3785,7 @@ static inline void balance_callback(struct rq *rq)
#else #else
static inline void balance_callback(struct rq *rq) inline void balance_callback(struct rq *rq)
{ {
} }
...@@ -3789,7 +3822,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) ...@@ -3789,7 +3822,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
/* /*
* context_switch - switch to the new MM and the new thread's register state. * context_switch - switch to the new MM and the new thread's register state.
*/ */
static __always_inline struct rq * __always_inline struct rq *
context_switch(struct rq *rq, struct task_struct *prev, context_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next, struct rq_flags *rf) struct task_struct *next, struct rq_flags *rf)
{ {
...@@ -3846,7 +3879,7 @@ context_switch(struct rq *rq, struct task_struct *prev, ...@@ -3846,7 +3879,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
barrier(); barrier();
return finish_task_switch(prev); return finish_task_switch(prev);
} }EXPORT_SYMBOL(context_switch);
/* /*
* nr_running and nr_context_switches: * nr_running and nr_context_switches:
...@@ -4615,7 +4648,7 @@ void __noreturn do_task_dead(void) ...@@ -4615,7 +4648,7 @@ void __noreturn do_task_dead(void)
cpu_relax(); cpu_relax();
} }
static inline void sched_submit_work(struct task_struct *tsk) inline void sched_submit_work(struct task_struct *tsk)
{ {
unsigned int task_flags; unsigned int task_flags;
...@@ -4651,7 +4684,7 @@ static inline void sched_submit_work(struct task_struct *tsk) ...@@ -4651,7 +4684,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
blk_schedule_flush_plug(tsk); blk_schedule_flush_plug(tsk);
} }
static void sched_update_worker(struct task_struct *tsk) void sched_update_worker(struct task_struct *tsk)
{ {
if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
if (tsk->flags & PF_WQ_WORKER) if (tsk->flags & PF_WQ_WORKER)
......
...@@ -572,6 +572,28 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -572,6 +572,28 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less); rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less);
} }
static void __traverse_cfs_rq(struct cfs_rq *cfs_rq, struct rb_node **node)
{
struct sched_entity *entry;
if (!*node) {
printk("TREE END\n");
return;
}
entry = rb_entry(*node, struct sched_entity, run_node);
__traverse_cfs_rq(cfs_rq, &(*node)->rb_left);
printk("%p\n", entry);
__traverse_cfs_rq(cfs_rq, &(*node)->rb_left);
}
void traverse_cfs_rq(struct cfs_rq *cfs_rq)
{
struct rb_node **link = &cfs_rq->tasks_timeline.rb_root.rb_node;
__traverse_cfs_rq(cfs_rq, link);
}
static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline); rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
...@@ -2982,7 +3004,7 @@ adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *), ...@@ -2982,7 +3004,7 @@ adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *),
} }
#endif #endif
static void void
account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
update_load_add(&cfs_rq->load, se->load.weight); update_load_add(&cfs_rq->load, se->load.weight);
...@@ -4340,7 +4362,11 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -4340,7 +4362,11 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_stats_enqueue(cfs_rq, se, flags); update_stats_enqueue(cfs_rq, se, flags);
check_spread(cfs_rq, se); check_spread(cfs_rq, se);
if (!curr) if (!curr)
__enqueue_entity(cfs_rq, se); #ifdef CONFIG_DTS
if (se->by_pass != INIT_BY_PASS)
#endif
__enqueue_entity(cfs_rq, se);
se->on_rq = 1; se->on_rq = 1;
/* /*
...@@ -4463,6 +4489,12 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ...@@ -4463,6 +4489,12 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
unsigned long ideal_runtime, delta_exec; unsigned long ideal_runtime, delta_exec;
struct sched_entity *se; struct sched_entity *se;
s64 delta; s64 delta;
#ifdef CONFIG_DTS
struct task_struct *curr_task = NULL;
if (entity_is_task(curr) && curr->by_pass != NONE_BY_PASS)
curr_task = task_of_dts_shared_se(curr);
#endif
ideal_runtime = sched_slice(cfs_rq, curr); ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
...@@ -4488,7 +4520,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ...@@ -4488,7 +4520,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
* re-elected due to buddy favours. * re-elected due to buddy favours.
*/ */
clear_buddies(cfs_rq, curr); clear_buddies(cfs_rq, curr);
return; goto end;
} }
/* /*
...@@ -4497,19 +4529,72 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ...@@ -4497,19 +4529,72 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
* This also mitigates buddy induced latencies under load. * This also mitigates buddy induced latencies under load.
*/ */
if (delta_exec < sysctl_sched_min_granularity) if (delta_exec < sysctl_sched_min_granularity)
return; goto end;
se = __pick_first_entity(cfs_rq); se = __pick_first_entity(cfs_rq);
delta = curr->vruntime - se->vruntime; delta = curr->vruntime - se->vruntime;
if (delta < 0) if (delta < 0)
return; goto end;
if (delta > ideal_runtime) if (delta > ideal_runtime) {
resched_curr(rq_of(cfs_rq)); resched_curr(rq_of(cfs_rq));
goto end;
} else {
return;
}
end:
#ifdef CONFIG_DTS
if (curr_task) {
curr_task->by_pass = END_BY_PASS;
curr_task->se.by_pass = END_BY_PASS;
curr_task->dts_shared_se.by_pass = END_BY_PASS;
}
#endif
} }
static void #ifdef CONFIG_DTS
/*
* We dequeue the task original se but we do NOT CHANGE any schedule infomation of se.
* Correspondingly, enqueue the task original se without any changes on se's information
* when the shared se expired. // TODO
* shared se's stats acquiring, etc NEEDs TO BE fixed when task execute in DTS mode. // TODO
*/
void
replace_shared_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, struct sched_entity *shared_se)
{
if (shared_se->on_rq) {
/*
* Any task has to be enqueued before it get to execute on
* a CPU. So account for the time it spent waiting on the
* runqueue.
*/
// TODO
update_stats_wait_end(cfs_rq, shared_se);
__dequeue_entity(cfs_rq, se); /* the se of next task should be dequeued */
update_load_avg(cfs_rq, shared_se, UPDATE_TG);
}
update_stats_curr_start(cfs_rq, shared_se);
cfs_rq->curr = shared_se; // 后续update_curr是update cfs_rq->curr
/*
* Track our maximum slice length, if the CPU's load is at
* least twice that of our own weight (i.e. dont track it
* when there are only lesser-weight tasks around):
*/
if (schedstat_enabled() &&
rq_of(cfs_rq)->cfs.load.weight >= 2*shared_se->load.weight) {
schedstat_set(shared_se->statistics.slice_max,
max((u64)schedstat_val(shared_se->statistics.slice_max),
shared_se->sum_exec_runtime - shared_se->prev_sum_exec_runtime));
}
shared_se->prev_sum_exec_runtime = shared_se->sum_exec_runtime;
}
#endif
void
set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
/* 'current' is not kept within the tree. */ /* 'current' is not kept within the tree. */
...@@ -4605,8 +4690,15 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) ...@@ -4605,8 +4690,15 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
/* the prev's value is unique or shared for the dts mechanism */
static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
{ {
#ifdef CONFIG_DTS
struct task_struct *task = NULL;
if (entity_is_task(prev))
task = task_of(prev);
#endif
/* /*
* If still on the runqueue then deactivate_task() * If still on the runqueue then deactivate_task()
* was not called and update_curr() has to be done: * was not called and update_curr() has to be done:
...@@ -4627,6 +4719,13 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) ...@@ -4627,6 +4719,13 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
update_load_avg(cfs_rq, prev, 0); update_load_avg(cfs_rq, prev, 0);
} }
cfs_rq->curr = NULL; cfs_rq->curr = NULL;
#ifdef CONFIG_DTS
if (task && task->by_pass == END_BY_PASS) {
task->by_pass = NONE_BY_PASS;
task->se.by_pass = NONE_BY_PASS;
task->dts_shared_se.by_pass = NONE_BY_PASS;
}
#endif
} }
static void static void
...@@ -5630,6 +5729,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -5630,6 +5729,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
int task_new = !(flags & ENQUEUE_WAKEUP); int task_new = !(flags & ENQUEUE_WAKEUP);
unsigned int prev_nr = rq->cfs.h_nr_running; unsigned int prev_nr = rq->cfs.h_nr_running;
#ifdef CONFIG_DTS
if (p->by_pass != NONE_BY_PASS) {
se = &p->dts_shared_se;
}
#endif
/* /*
* The code below (indirectly) updates schedutil which looks at * The code below (indirectly) updates schedutil which looks at
* the cfs_rq utilization to select a frequency. * the cfs_rq utilization to select a frequency.
...@@ -5737,11 +5842,17 @@ static void set_next_buddy(struct sched_entity *se); ...@@ -5737,11 +5842,17 @@ static void set_next_buddy(struct sched_entity *se);
static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{ {
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se; struct sched_entity *se;
int task_sleep = flags & DEQUEUE_SLEEP; int task_sleep = flags & DEQUEUE_SLEEP;
int idle_h_nr_running = task_has_idle_policy(p); int idle_h_nr_running = task_has_idle_policy(p);
unsigned int prev_nr = rq->cfs.h_nr_running; unsigned int prev_nr = rq->cfs.h_nr_running;
bool was_sched_idle = sched_idle_rq(rq); bool was_sched_idle = sched_idle_rq(rq);
#ifdef CONFIG_DTS
if (p->by_pass != NONE_BY_PASS)
se = &p->dts_shared_se;
else
#endif
se = &p->se;
util_est_dequeue(&rq->cfs, p); util_est_dequeue(&rq->cfs, p);
...@@ -7159,11 +7270,28 @@ static void set_skip_buddy(struct sched_entity *se) ...@@ -7159,11 +7270,28 @@ static void set_skip_buddy(struct sched_entity *se)
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
{ {
struct task_struct *curr = rq->curr; struct task_struct *curr = rq->curr;
struct sched_entity *se = &curr->se, *pse = &p->se; struct sched_entity *se, *pse;
struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct cfs_rq *cfs_rq = task_cfs_rq(curr);
int scale = cfs_rq->nr_running >= sched_nr_latency; int scale = cfs_rq->nr_running >= sched_nr_latency;
int next_buddy_marked = 0; int next_buddy_marked = 0;
#ifdef CONFIG_DTS
int curr_by_pass = curr->by_pass;
int p_by_pass = p->by_pass;
if (curr_by_pass != NONE_BY_PASS)
se = &curr->dts_shared_se;
else
#endif
se = &curr->se;
#ifdef CONFIG_DTS
if (p_by_pass != NONE_BY_PASS)
pse = &p->dts_shared_se;
else
#endif
pse = &p->se;
if (unlikely(se == pse)) if (unlikely(se == pse))
return; return;
...@@ -7718,13 +7846,25 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf ...@@ -7718,13 +7846,25 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
p = task_of(se); p = task_of(se);
if (se == NULL) {
printk("CFS_RQ Nr_running: %d\n", rq->cfs.nr_running);
printk("RQ Nr_running: %d\n", rq->nr_running);
}
/* /*
* Since we haven't yet done put_prev_entity and if the selected task * Since we haven't yet done put_prev_entity and if the selected task
* is a different task than we started out with, try and touch the * is a different task than we started out with, try and touch the
* least amount of cfs_rqs. * least amount of cfs_rqs.
*/ */
if (prev != p) { if (prev != p) {
struct sched_entity *pse = &prev->se; struct sched_entity *pse;
#ifdef CONFIG_DTS
if (prev->by_pass != NONE_BY_PASS)
pse = &prev->dts_shared_se;
else
#endif
pse = &prev->se;
while (!(cfs_rq = is_same_group(se, pse))) { while (!(cfs_rq = is_same_group(se, pse))) {
int se_depth = se->depth; int se_depth = se->depth;
...@@ -7877,8 +8017,15 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq) ...@@ -7877,8 +8017,15 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq)
*/ */
static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
{ {
struct sched_entity *se = &prev->se; struct sched_entity *se;
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
#ifdef CONFIG_DTS
if (prev->by_pass != NONE_BY_PASS)
se = &prev->dts_shared_se;
else
#endif
se = &prev->se;
for_each_sched_entity(se) { for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se); cfs_rq = cfs_rq_of(se);
...@@ -7895,7 +8042,13 @@ static void yield_task_fair(struct rq *rq) ...@@ -7895,7 +8042,13 @@ static void yield_task_fair(struct rq *rq)
{ {
struct task_struct *curr = rq->curr; struct task_struct *curr = rq->curr;
struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct cfs_rq *cfs_rq = task_cfs_rq(curr);
struct sched_entity *se = &curr->se; struct sched_entity *se;
#ifdef CONFIG_DTS
if (curr->by_pass != NONE_BY_PASS)
se = &curr->dts_shared_se;
else
#endif
se = &curr->se;
/* /*
* Are we the only task in the tree? * Are we the only task in the tree?
...@@ -7926,6 +8079,13 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) ...@@ -7926,6 +8079,13 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
{ {
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
#ifdef CONFIG_DTS
/* DTS tasks DO NOT support being executed by yeild_to method.*/
if (p->by_pass != NONE_BY_PASS) {
return false;
}
#endif
/* throttled hierarchies are not runnable */ /* throttled hierarchies are not runnable */
if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se))) if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
return false; return false;
...@@ -8363,7 +8523,7 @@ can_migrate_task_llc(struct task_struct *p, struct rq *rq, struct rq *dst_rq) ...@@ -8363,7 +8523,7 @@ can_migrate_task_llc(struct task_struct *p, struct rq *rq, struct rq *dst_rq)
/* /*
* detach_task() -- detach the task for the migration from @src_rq to @dst_cpu. * detach_task() -- detach the task for the migration from @src_rq to @dst_cpu.
*/ */
static void detach_task(struct task_struct *p, struct rq *src_rq, int dst_cpu) void detach_task(struct task_struct *p, struct rq *src_rq, int dst_cpu)
{ {
lockdep_assert_held(&src_rq->lock); lockdep_assert_held(&src_rq->lock);
...@@ -8573,6 +8733,10 @@ static void attach_task(struct rq *rq, struct task_struct *p) ...@@ -8573,6 +8733,10 @@ static void attach_task(struct rq *rq, struct task_struct *p)
BUG_ON(task_rq(p) != rq); BUG_ON(task_rq(p) != rq);
activate_task(rq, p, ENQUEUE_NOCLOCK); activate_task(rq, p, ENQUEUE_NOCLOCK);
#ifdef CONFIG_DTS
if (p->by_pass != INIT_BY_PASS)
#endif
check_preempt_curr(rq, p, 0); check_preempt_curr(rq, p, 0);
} }
...@@ -11544,6 +11708,53 @@ static int steal_from(struct rq *dst_rq, struct rq_flags *dst_rf, bool *locked, ...@@ -11544,6 +11708,53 @@ static int steal_from(struct rq *dst_rq, struct rq_flags *dst_rf, bool *locked,
return stolen; return stolen;
} }
int steal_task(struct rq *dst_rq, struct rq_flags *dst_rf, bool *locked,
struct task_struct *tsk)
{
struct rq_flags rf;
int stolen = 0;
int dst_cpu = dst_rq->cpu;
struct rq *src_rq = task_rq(tsk);
int src_cpu = task_cpu(tsk);
if (!steal_enabled())
return 0;
if (!cpu_active(dst_cpu))
return 0;
if (dst_cpu == src_cpu)
return 0;
if (*locked) {
rq_unpin_lock(dst_rq, dst_rf);
raw_spin_unlock(&dst_rq->lock);
*locked = false;
}
rq_lock_irqsave(src_rq, &rf);
update_rq_clock(src_rq);
if (!cpu_active(src_cpu))
tsk = NULL;
else
detach_task(tsk, src_rq, dst_cpu);
rq_unlock(src_rq, &rf);
if (tsk) {
raw_spin_lock(&dst_rq->lock);
rq_repin_lock(dst_rq, dst_rf);
*locked = true;
update_rq_clock(dst_rq);
attach_task(dst_rq, tsk);
stolen = 1;
schedstat_inc(dst_rq->steal);
}
local_irq_restore(rf.flags);
return stolen;
}
/* /*
* Conservative upper bound on the max cost of a steal, in nsecs (the typical * Conservative upper bound on the max cost of a steal, in nsecs (the typical
* cost is 1-2 microsec). Do not steal if average idle time is less. * cost is 1-2 microsec). Do not steal if average idle time is less.
...@@ -11653,6 +11864,12 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) ...@@ -11653,6 +11864,12 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
struct sched_entity *se = &curr->se; struct sched_entity *se = &curr->se;
#ifdef CONFIG_DTS
if (curr->by_pass != NONE_BY_PASS) {
se = &curr->dts_shared_se;
}
#endif
for_each_sched_entity(se) { for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se); cfs_rq = cfs_rq_of(se);
entity_tick(cfs_rq, se, queued); entity_tick(cfs_rq, se, queued);
...@@ -12148,6 +12365,81 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task ...@@ -12148,6 +12365,81 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
return rr_interval; return rr_interval;
} }
void update_before_bypass(void)
{
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
struct sched_entity *curr;
struct cfs_rq *cfs_rq;
#ifdef CONFIG_DTS
if (current->by_pass != NONE_BY_PASS)
curr = &current->dts_shared_se;
else
#endif
curr = &current->se;
cfs_rq = cfs_rq_of(curr);
rq_lock(rq, &rf);
update_rq_clock(rq);
/*
* Ensure that runnable average is periodically updated.
*/
update_load_avg(cfs_rq, curr, UPDATE_TG);
update_cfs_group(curr);
/*
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
/*
* Ensure that runnable average is periodically updated.
*/
update_load_avg(cfs_rq, curr, UPDATE_TG);
update_cfs_group(curr);
rq_unlock(rq, &rf);
}
/*
* return 1: left time Y
*
*/
int check_task_left_time(struct task_struct *task)
{
unsigned long ideal_runtime, delta_exec;
struct sched_entity *se;
struct cfs_rq *cfs_rq;
#ifdef CONFIG_DTS
if (task->by_pass != NONE_BY_PASS)
se = &task->dts_shared_se;
else
#endif
se = &task->se;
cfs_rq = cfs_rq_of(se);
ideal_runtime = sched_slice(cfs_rq, se);
delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
if (cfs_rq->nr_running > 1) {
resched_curr(rq_of(cfs_rq));
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
*/
clear_buddies(cfs_rq, se);
}
return 0;
}
return 1;
}
/* /*
* All the scheduling class methods: * All the scheduling class methods:
*/ */
......
...@@ -534,6 +534,12 @@ extern void sched_offline_group(struct task_group *tg); ...@@ -534,6 +534,12 @@ extern void sched_offline_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk); extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_DTS
extern void replace_shared_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev_se, struct sched_entity *shared_se);
#endif
extern void set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
...@@ -1185,9 +1191,22 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ...@@ -1185,9 +1191,22 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define raw_rq() raw_cpu_ptr(&runqueues) #define raw_rq() raw_cpu_ptr(&runqueues)
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_DTS
static inline struct task_struct *task_of_dts_shared_se(struct sched_entity *dts_shared_se)
{
SCHED_WARN_ON(!entity_is_task(dts_shared_se));
return container_of(dts_shared_se, struct task_struct, dts_shared_se);
}
#endif
static inline struct task_struct *task_of(struct sched_entity *se) static inline struct task_struct *task_of(struct sched_entity *se)
{ {
SCHED_WARN_ON(!entity_is_task(se)); SCHED_WARN_ON(!entity_is_task(se));
#ifdef CONFIG_DTS
if (se->by_pass != NONE_BY_PASS)
return task_of_dts_shared_se(se);
else
#endif
return container_of(se, struct task_struct, se); return container_of(se, struct task_struct, se);
} }
...@@ -1210,8 +1229,28 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) ...@@ -1210,8 +1229,28 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
#else #else
#ifdef CONFIG_DTS
static inline struct task_struct *task_of_dts_shared_se(struct sched_entity *dts_shared_se)
{
return container_of(dts_shared_se, struct task_struct, dts_shared_se);
}
static inline struct cfs_rq *cfs_rq_of_dts_shared_se(struct sched_entity *se)
{
struct task_struct *p = task_of_dts_shared_se(se);
struct rq *rq = task_rq(p);
return &rq->cfs;
}
#endif
static inline struct task_struct *task_of(struct sched_entity *se) static inline struct task_struct *task_of(struct sched_entity *se)
{ {
#ifdef CONFIG_DTS
if (se->by_pass != NONE_BY_PASS)
return task_of_dts_shared_se(se);
else
#endif
return container_of(se, struct task_struct, se); return container_of(se, struct task_struct, se);
} }
...@@ -1220,7 +1259,7 @@ static inline struct cfs_rq *task_cfs_rq(struct task_struct *p) ...@@ -1220,7 +1259,7 @@ static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
return &task_rq(p)->cfs; return &task_rq(p)->cfs;
} }
static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se) static inline struct cfs_rq *cfs_rq_of_se(struct sched_entity *se)
{ {
struct task_struct *p = task_of(se); struct task_struct *p = task_of(se);
struct rq *rq = task_rq(p); struct rq *rq = task_rq(p);
...@@ -1228,6 +1267,17 @@ static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se) ...@@ -1228,6 +1267,17 @@ static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
return &rq->cfs; return &rq->cfs;
} }
static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
{
#ifdef CONFIG_DTS
if (se->by_pass != NONE_BY_PASS)
return cfs_rq_of_dts_shared_se(se);
else
#endif
return cfs_rq_of_se(se);
}
/* runqueue "owned" by this group */ /* runqueue "owned" by this group */
static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
{ {
...@@ -2404,6 +2454,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) ...@@ -2404,6 +2454,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
extern void traverse_cfs_rq(struct cfs_rq *cfs_rq);
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
extern bool sched_debug_enabled; extern bool sched_debug_enabled;
...@@ -2789,3 +2840,18 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) ...@@ -2789,3 +2840,18 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
void swake_up_all_locked(struct swait_queue_head *q); void swake_up_all_locked(struct swait_queue_head *q);
void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
#ifdef CONFIG_DTS
extern void sched_submit_work(struct task_struct *tsk);
extern void sched_update_worker(struct task_struct *tsk);
extern struct rq *context_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next, struct rq_flags *rf);
extern void
account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se);
#ifdef CONFIG_SCHED_STEAL
extern int steal_task(struct rq *dst_rq, struct rq_flags *dst_rf, bool *locked,
struct task_struct *tsk);
extern void update_before_bypass(void);
extern void balance_callback(struct rq *rq);
#endif
#endif
\ No newline at end of file
...@@ -13,11 +13,11 @@ ...@@ -13,11 +13,11 @@
/* The futex the main thread waits on. */ /* The futex the main thread waits on. */
futex_t futex_main = FUTEX_INITIALIZER; futex_t futex_main = FUTEX_INITIALIZER;
/* The futex the other thread wats on. */ /* The futex the other thread waits on. */
futex_t futex_other = FUTEX_INITIALIZER; futex_t futex_other = FUTEX_INITIALIZER;
/* The number of iterations to run (>1 => run benchmarks. */ /* The number of iterations to run (>1 => run benchmarks. */
static int cfg_iterations = 1; static int cfg_iterations = 5;
/* If != 0, print diagnostic messages. */ /* If != 0, print diagnostic messages. */
static int cfg_verbose; static int cfg_verbose;
...@@ -28,17 +28,21 @@ static int cfg_validate = 1; ...@@ -28,17 +28,21 @@ static int cfg_validate = 1;
/* How to swap threads. */ /* How to swap threads. */
#define SWAP_WAKE_WAIT 1 #define SWAP_WAKE_WAIT 1
#define SWAP_SWAP 2 #define SWAP_SWAP 2
#define SWAP_SWAP_DTS 4
/* Futex values. */ /* Futex values. */
#define FUTEX_WAITING 0 #define FUTEX_WAITING 0
#define FUTEX_WAKEUP 1 #define FUTEX_WAKEUP 1
#define FUTEX_FLAGS_DTS_MODE 512
/* An atomic counter used to validate proper swapping. */ /* An atomic counter used to validate proper swapping. */
static atomic_t validation_counter; static atomic_t validation_counter;
void futex_swap_op(int mode, futex_t *futex_this, futex_t *futex_that) void futex_swap_op(int mode, futex_t *futex_this, futex_t *futex_that)
{ {
int ret; int ret;
int flags = 0;
switch (mode) { switch (mode) {
case SWAP_WAKE_WAIT: case SWAP_WAKE_WAIT:
...@@ -52,11 +56,14 @@ void futex_swap_op(int mode, futex_t *futex_this, futex_t *futex_that) ...@@ -52,11 +56,14 @@ void futex_swap_op(int mode, futex_t *futex_this, futex_t *futex_that)
} }
break; break;
case SWAP_SWAP_DTS:
flags |= FUTEX_FLAGS_DTS_MODE;
case SWAP_SWAP: case SWAP_SWAP:
flags |= FUTEX_PRIVATE_FLAG;
futex_set(futex_this, FUTEX_WAITING); futex_set(futex_this, FUTEX_WAITING);
futex_set(futex_that, FUTEX_WAKEUP); futex_set(futex_that, FUTEX_WAKEUP);
ret = futex_swap(futex_this, FUTEX_WAITING, NULL, ret = futex_swap(futex_this, FUTEX_WAITING, NULL,
futex_that, FUTEX_PRIVATE_FLAG); futex_that, flags);
if (ret < 0 && errno == ENOSYS) { if (ret < 0 && errno == ENOSYS) {
/* futex_swap not implemented */ /* futex_swap not implemented */
perror("futex_swap"); perror("futex_swap");
...@@ -171,13 +178,14 @@ void usage(char *prog) ...@@ -171,13 +178,14 @@ void usage(char *prog)
printf(" -i N Use N iterations to benchmark\n"); printf(" -i N Use N iterations to benchmark\n");
printf(" -n Do not validate swapping correctness\n"); printf(" -n Do not validate swapping correctness\n");
printf(" -v Print diagnostic messages\n"); printf(" -v Print diagnostic messages\n");
printf(" -d Benchmark with the direct-thread-switch(DTS) mechanism\n");
} }
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int c; int c;
while ((c = getopt(argc, argv, "hi:nv")) != -1) { while ((c = getopt(argc, argv, "hi:nvd")) != -1) {
switch (c) { switch (c) {
case 'h': case 'h':
usage(basename(argv[0])); usage(basename(argv[0]));
...@@ -191,6 +199,9 @@ int main(int argc, char *argv[]) ...@@ -191,6 +199,9 @@ int main(int argc, char *argv[])
case 'v': case 'v':
cfg_verbose = 1; cfg_verbose = 1;
break; break;
case 'd':
goto dts_test;
break;
default: default:
usage(basename(argv[0])); usage(basename(argv[0]));
exit(1); exit(1);
...@@ -205,5 +216,10 @@ int main(int argc, char *argv[]) ...@@ -205,5 +216,10 @@ int main(int argc, char *argv[])
run_test(SWAP_SWAP); run_test(SWAP_SWAP);
printf("PASS\n"); printf("PASS\n");
dts_test:
printf("\n\n---- running SWAP_SWAP with the direct-thread-switch(DTS) mechanism ----\n\n");
run_test(SWAP_SWAP_DTS);
printf("PASS\n");
return 0; return 0;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册