提交 65b2074f 编写于 作者: L Linus Torvalds

Merge branch 'sched-core-for-linus' of...

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (30 commits)
  sched: Change wait_for_completion_*_timeout() to return a signed long
  sched, autogroup: Fix reference leak
  sched, autogroup: Fix potential access to freed memory
  sched: Remove redundant CONFIG_CGROUP_SCHED ifdef
  sched: Fix interactivity bug by charging unaccounted run-time on entity re-weight
  sched: Move periodic share updates to entity_tick()
  printk: Use this_cpu_{read|write} api on printk_pending
  sched: Make pushable_tasks CONFIG_SMP dependant
  sched: Add 'autogroup' scheduling feature: automated per session task groups
  sched: Fix unregister_fair_sched_group()
  sched: Remove unused argument dest_cpu to migrate_task()
  mutexes, sched: Introduce arch_mutex_cpu_relax()
  sched: Add some clock info to sched_debug
  cpu: Remove incorrect BUG_ON
  cpu: Remove unused variable
  sched: Fix UP build breakage
  sched: Make task dump print all 15 chars of proc comm
  sched: Update tg->shares after cpu.shares write
  sched: Allow update_cfs_load() to update global load
  sched: Implement demand based update_cfs_load()
  ...
......@@ -1614,6 +1614,8 @@ and is between 256 and 4096 characters. It is defined in the file
noapic [SMP,APIC] Tells the kernel to not make use of any
IOAPICs that may be present in the system.
noautogroup Disable scheduler automatic task group creation.
nobats [PPC] Do not use BATs for mapping kernel lowmem
on "Classic" PPC cores.
......
......@@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI
config HAVE_ARCH_JUMP_LABEL
bool
config HAVE_ARCH_MUTEX_CPU_RELAX
bool
source "kernel/gcov/Kconfig"
......@@ -99,6 +99,7 @@ config S390
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
select HAVE_GET_USER_PAGES_FAST
select HAVE_ARCH_MUTEX_CPU_RELAX
select ARCH_INLINE_SPIN_TRYLOCK
select ARCH_INLINE_SPIN_TRYLOCK_BH
select ARCH_INLINE_SPIN_LOCK
......
......@@ -7,3 +7,5 @@
*/
#include <asm-generic/mutex-dec.h>
#define arch_mutex_cpu_relax() barrier()
......@@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = {
#endif
#ifdef CONFIG_SCHED_AUTOGROUP
/*
* Print out autogroup related information:
*/
static int sched_autogroup_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
struct task_struct *p;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
proc_sched_autogroup_show_task(p, m);
put_task_struct(p);
return 0;
}
static ssize_t
sched_autogroup_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct task_struct *p;
char buffer[PROC_NUMBUF];
long nice;
int err;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
err = strict_strtol(strstrip(buffer), 0, &nice);
if (err)
return -EINVAL;
p = get_proc_task(inode);
if (!p)
return -ESRCH;
err = nice;
err = proc_sched_autogroup_set_nice(p, &err);
if (err)
count = err;
put_task_struct(p);
return count;
}
static int sched_autogroup_open(struct inode *inode, struct file *filp)
{
int ret;
ret = single_open(filp, sched_autogroup_show, NULL);
if (!ret) {
struct seq_file *m = filp->private_data;
m->private = inode;
}
return ret;
}
static const struct file_operations proc_pid_sched_autogroup_operations = {
.open = sched_autogroup_open,
.read = seq_read,
.write = sched_autogroup_write,
.llseek = seq_lseek,
.release = single_release,
};
#endif /* CONFIG_SCHED_AUTOGROUP */
static ssize_t comm_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
......@@ -2732,6 +2808,9 @@ static const struct pid_entry tgid_base_stuff[] = {
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
#ifdef CONFIG_SCHED_AUTOGROUP
REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
......
......@@ -81,10 +81,10 @@ extern int wait_for_completion_interruptible(struct completion *x);
extern int wait_for_completion_killable(struct completion *x);
extern unsigned long wait_for_completion_timeout(struct completion *x,
unsigned long timeout);
extern unsigned long wait_for_completion_interruptible_timeout(
struct completion *x, unsigned long timeout);
extern unsigned long wait_for_completion_killable_timeout(
struct completion *x, unsigned long timeout);
extern long wait_for_completion_interruptible_timeout(
struct completion *x, unsigned long timeout);
extern long wait_for_completion_killable_timeout(
struct completion *x, unsigned long timeout);
extern bool try_wait_for_completion(struct completion *x);
extern bool completion_done(struct completion *x);
......
......@@ -12,6 +12,13 @@
#include <linux/securebits.h>
#include <net/net_namespace.h>
#ifdef CONFIG_SMP
# define INIT_PUSHABLE_TASKS(tsk) \
.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
#else
# define INIT_PUSHABLE_TASKS(tsk)
#endif
extern struct files_struct init_files;
extern struct fs_struct init_fs;
......@@ -144,7 +151,7 @@ extern struct cred init_cred;
.nr_cpus_allowed = NR_CPUS, \
}, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \
INIT_PUSHABLE_TASKS(tsk) \
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
.ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
.real_parent = &tsk, \
......
......@@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock);
extern void mutex_unlock(struct mutex *lock);
extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
#define arch_mutex_cpu_relax() cpu_relax()
#endif
#endif
......@@ -513,6 +513,8 @@ struct thread_group_cputimer {
spinlock_t lock;
};
struct autogroup;
/*
* NOTE! "signal_struct" does not have it's own
* locking, because a shared signal_struct always
......@@ -580,6 +582,9 @@ struct signal_struct {
struct tty_struct *tty; /* NULL if no tty */
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup *autogroup;
#endif
/*
* Cumulative resource counters for dead threads in the group,
* and for reaped dead child processes forked by this group.
......@@ -1242,7 +1247,9 @@ struct task_struct {
#endif
struct list_head tasks;
#ifdef CONFIG_SMP
struct plist_node pushable_tasks;
#endif
struct mm_struct *mm, *active_mm;
#if defined(SPLIT_RSS_COUNTING)
......@@ -1883,14 +1890,11 @@ extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#ifdef CONFIG_HOTPLUG_CPU
extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
extern void idle_task_exit(void);
#else
static inline void idle_task_exit(void) {}
#endif
extern void sched_idle_next(void);
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
extern void wake_up_idle_cpu(int cpu);
#else
......@@ -1900,8 +1904,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_shares_ratelimit;
extern unsigned int sysctl_sched_shares_thresh;
extern unsigned int sysctl_sched_child_runs_first;
enum sched_tunable_scaling {
......@@ -1917,6 +1919,7 @@ extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration;
extern unsigned int sysctl_sched_shares_window;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
......@@ -1942,6 +1945,24 @@ int sched_rt_handler(struct ctl_table *table, int write,
extern unsigned int sysctl_sched_compat_yield;
#ifdef CONFIG_SCHED_AUTOGROUP
extern unsigned int sysctl_sched_autogroup_enabled;
extern void sched_autogroup_create_attach(struct task_struct *p);
extern void sched_autogroup_detach(struct task_struct *p);
extern void sched_autogroup_fork(struct signal_struct *sig);
extern void sched_autogroup_exit(struct signal_struct *sig);
#ifdef CONFIG_PROC_FS
extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
#endif
#else
static inline void sched_autogroup_create_attach(struct task_struct *p) { }
static inline void sched_autogroup_detach(struct task_struct *p) { }
static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
#endif
#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
......@@ -1960,9 +1981,10 @@ extern int task_nice(const struct task_struct *p);
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
extern int sched_setscheduler(struct task_struct *, int,
const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int,
struct sched_param *);
const struct sched_param *);
extern struct task_struct *idle_task(int cpu);
extern struct task_struct *curr_task(int cpu);
extern void set_curr_task(int cpu, struct task_struct *p);
......
......@@ -794,6 +794,19 @@ config NET_NS
endif # NAMESPACES
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select EVENTFD
select CGROUPS
select CGROUP_SCHED
select FAIR_GROUP_SCHED
help
This option optimizes the scheduler for common desktop workloads by
automatically creating and populating task groups. This separation
of workloads isolates aggressive CPU burners (like build jobs) from
desktop applications. Task group autogeneration is currently based
upon task session.
config MM_OWNER
bool
......
......@@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu)
}
struct take_cpu_down_param {
struct task_struct *caller;
unsigned long mod;
void *hcpu;
};
......@@ -198,7 +197,6 @@ struct take_cpu_down_param {
static int __ref take_cpu_down(void *_param)
{
struct take_cpu_down_param *param = _param;
unsigned int cpu = (unsigned long)param->hcpu;
int err;
/* Ensure this CPU doesn't handle any more interrupts. */
......@@ -208,11 +206,6 @@ static int __ref take_cpu_down(void *_param)
cpu_notify(CPU_DYING | param->mod, param->hcpu);
if (task_cpu(param->caller) == cpu)
move_task_off_dead_cpu(cpu, param->caller);
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
return 0;
}
......@@ -223,7 +216,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
.caller = current,
.mod = mod,
.hcpu = hcpu,
};
......@@ -253,9 +245,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
}
BUG_ON(cpu_online(cpu));
/* Wait for it to sleep (leaving idle task). */
/*
* The migration_call() CPU_DYING callback will have removed all
* runnable tasks from the cpu, there's only the idle task left now
* that the migration thread is done doing the stop_machine thing.
*
* Wait for the stop thread to go away.
*/
while (!idle_cpu(cpu))
yield();
cpu_relax();
/* This actually kills the CPU. */
__cpu_die(cpu);
......
......@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
static inline void put_signal_struct(struct signal_struct *sig)
{
if (atomic_dec_and_test(&sig->sigcnt))
if (atomic_dec_and_test(&sig->sigcnt)) {
sched_autogroup_exit(sig);
free_signal_struct(sig);
}
}
void __put_task_struct(struct task_struct *tsk)
......@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
posix_cpu_timers_init_group(sig);
tty_audit_fork(sig);
sched_autogroup_fork(sig);
sig->oom_adj = current->signal->oom_adj;
sig->oom_score_adj = current->signal->oom_score_adj;
......@@ -1315,7 +1318,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
bad_fork_cleanup_signal:
if (!(clone_flags & CLONE_THREAD))
free_signal_struct(p->signal);
put_signal_struct(p->signal);
bad_fork_cleanup_sighand:
__cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
......
......@@ -577,7 +577,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
*/
static int irq_thread(void *data)
{
struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
static struct sched_param param = {
.sched_priority = MAX_USER_RT_PRIO/2,
};
struct irqaction *action = data;
struct irq_desc *desc = irq_to_desc(action->irq);
int wake, oneshot = desc->status & IRQ_ONESHOT;
......
......@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
wait_for_completion(&create.done);
if (!IS_ERR(create.result)) {
struct sched_param param = { .sched_priority = 0 };
static struct sched_param param = { .sched_priority = 0 };
va_list args;
va_start(args, namefmt);
......
......@@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
cpu_relax();
arch_mutex_cpu_relax();
}
#endif
spin_lock_mutex(&lock->wait_lock, flags);
......
......@@ -1074,17 +1074,17 @@ static DEFINE_PER_CPU(int, printk_pending);
void printk_tick(void)
{
if (__get_cpu_var(printk_pending)) {
__get_cpu_var(printk_pending) = 0;
if (__this_cpu_read(printk_pending)) {
__this_cpu_write(printk_pending, 0);
wake_up_interruptible(&log_wait);
}
}
int printk_needs_cpu(int cpu)
{
if (unlikely(cpu_is_offline(cpu)))
if (cpu_is_offline(cpu))
printk_tick();
return per_cpu(printk_pending, cpu);
return __this_cpu_read(printk_pending);
}
void wake_up_klogd(void)
......
此差异已折叠。
#ifdef CONFIG_SCHED_AUTOGROUP
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/utsname.h>
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
static struct autogroup autogroup_default;
static atomic_t autogroup_seq_nr;
static void autogroup_init(struct task_struct *init_task)
{
autogroup_default.tg = &init_task_group;
init_task_group.autogroup = &autogroup_default;
kref_init(&autogroup_default.kref);
init_rwsem(&autogroup_default.lock);
init_task->signal->autogroup = &autogroup_default;
}
static inline void autogroup_free(struct task_group *tg)
{
kfree(tg->autogroup);
}
static inline void autogroup_destroy(struct kref *kref)
{
struct autogroup *ag = container_of(kref, struct autogroup, kref);
sched_destroy_group(ag->tg);
}
static inline void autogroup_kref_put(struct autogroup *ag)
{
kref_put(&ag->kref, autogroup_destroy);
}
static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
{
kref_get(&ag->kref);
return ag;
}
static inline struct autogroup *autogroup_task_get(struct task_struct *p)
{
struct autogroup *ag;
unsigned long flags;
if (!lock_task_sighand(p, &flags))
return autogroup_kref_get(&autogroup_default);
ag = autogroup_kref_get(p->signal->autogroup);
unlock_task_sighand(p, &flags);
return ag;
}
static inline struct autogroup *autogroup_create(void)
{
struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
struct task_group *tg;
if (!ag)
goto out_fail;
tg = sched_create_group(&init_task_group);
if (IS_ERR(tg))
goto out_free;
kref_init(&ag->kref);
init_rwsem(&ag->lock);
ag->id = atomic_inc_return(&autogroup_seq_nr);
ag->tg = tg;
tg->autogroup = ag;
return ag;
out_free:
kfree(ag);
out_fail:
if (printk_ratelimit()) {
printk(KERN_WARNING "autogroup_create: %s failure.\n",
ag ? "sched_create_group()" : "kmalloc()");
}
return autogroup_kref_get(&autogroup_default);
}
static inline bool
task_wants_autogroup(struct task_struct *p, struct task_group *tg)
{
if (tg != &root_task_group)
return false;
if (p->sched_class != &fair_sched_class)
return false;
/*
* We can only assume the task group can't go away on us if
* autogroup_move_group() can see us on ->thread_group list.
*/
if (p->flags & PF_EXITING)
return false;
return true;
}
static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg)
{
int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
if (enabled && task_wants_autogroup(p, tg))
return p->signal->autogroup->tg;
return tg;
}
static void
autogroup_move_group(struct task_struct *p, struct autogroup *ag)
{
struct autogroup *prev;
struct task_struct *t;
unsigned long flags;
BUG_ON(!lock_task_sighand(p, &flags));
prev = p->signal->autogroup;
if (prev == ag) {
unlock_task_sighand(p, &flags);
return;
}
p->signal->autogroup = autogroup_kref_get(ag);
t = p;
do {
sched_move_task(t);
} while_each_thread(p, t);
unlock_task_sighand(p, &flags);
autogroup_kref_put(prev);
}
/* Allocates GFP_KERNEL, cannot be called under any spinlock */
void sched_autogroup_create_attach(struct task_struct *p)
{
struct autogroup *ag = autogroup_create();
autogroup_move_group(p, ag);
/* drop extra refrence added by autogroup_create() */
autogroup_kref_put(ag);
}
EXPORT_SYMBOL(sched_autogroup_create_attach);
/* Cannot be called under siglock. Currently has no users */
void sched_autogroup_detach(struct task_struct *p)
{
autogroup_move_group(p, &autogroup_default);
}
EXPORT_SYMBOL(sched_autogroup_detach);
void sched_autogroup_fork(struct signal_struct *sig)
{
sig->autogroup = autogroup_task_get(current);
}
void sched_autogroup_exit(struct signal_struct *sig)
{
autogroup_kref_put(sig->autogroup);
}
static int __init setup_autogroup(char *str)
{
sysctl_sched_autogroup_enabled = 0;
return 1;
}
__setup("noautogroup", setup_autogroup);
#ifdef CONFIG_PROC_FS
int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
{
static unsigned long next = INITIAL_JIFFIES;
struct autogroup *ag;
int err;
if (*nice < -20 || *nice > 19)
return -EINVAL;
err = security_task_setnice(current, *nice);
if (err)
return err;
if (*nice < 0 && !can_nice(current, *nice))
return -EPERM;
/* this is a heavy operation taking global locks.. */
if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
return -EAGAIN;
next = HZ / 10 + jiffies;
ag = autogroup_task_get(p);
down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]);
if (!err)
ag->nice = *nice;
up_write(&ag->lock);
autogroup_kref_put(ag);
return err;
}
void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
{
struct autogroup *ag = autogroup_task_get(p);
down_read(&ag->lock);
seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
up_read(&ag->lock);
autogroup_kref_put(ag);
}
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SCHED_DEBUG
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
{
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
}
#endif /* CONFIG_SCHED_DEBUG */
#endif /* CONFIG_SCHED_AUTOGROUP */
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup {
struct kref kref;
struct task_group *tg;
struct rw_semaphore lock;
unsigned long id;
int nice;
};
static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg);
#else /* !CONFIG_SCHED_AUTOGROUP */
static inline void autogroup_init(struct task_struct *init_task) { }
static inline void autogroup_free(struct task_group *tg) { }
static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg)
{
return tg;
}
#ifdef CONFIG_SCHED_DEBUG
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
{
return 0;
}
#endif
#endif /* CONFIG_SCHED_AUTOGROUP */
......@@ -79,7 +79,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
}
EXPORT_SYMBOL_GPL(sched_clock);
static __read_mostly int sched_clock_running;
__read_mostly int sched_clock_running;
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
__read_mostly int sched_clock_stable;
......
......@@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec)
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
#ifdef CONFIG_FAIR_GROUP_SCHED
static void print_cfs_group_stats(struct seq_file *m, int cpu,
struct task_group *tg)
static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
{
struct sched_entity *se = tg->se[cpu];
if (!se)
......@@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
#endif
#ifdef CONFIG_CGROUP_SCHED
{
char path[64];
rcu_read_lock();
cgroup_path(task_group(p)->css.cgroup, path, sizeof(path));
rcu_read_unlock();
SEQ_printf(m, " %s", path);
}
#endif
SEQ_printf(m, "\n");
}
......@@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
read_unlock_irqrestore(&tasklist_lock, flags);
}
#if defined(CONFIG_CGROUP_SCHED) && \
(defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED))
static void task_group_path(struct task_group *tg, char *buf, int buflen)
{
/* may be NULL if the underlying cgroup isn't fully-created yet */
if (!tg->css.cgroup) {
buf[0] = '\0';
return;
}
cgroup_path(tg->css.cgroup, buf, buflen);
}
#endif
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{
s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
......@@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
struct sched_entity *last;
unsigned long flags;
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
char path[128];
struct task_group *tg = cfs_rq->tg;
task_group_path(tg, path, sizeof(path));
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
#else
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
#endif
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
SPLIT_NS(cfs_rq->exec_clock));
......@@ -202,32 +169,29 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
spread0 = min_vruntime - rq0_min_vruntime;
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
SPLIT_NS(cfs_rq->load_avg));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
SPLIT_NS(cfs_rq->load_period));
SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
cfs_rq->load_contribution);
SEQ_printf(m, " .%-30s: %d\n", "load_tg",
atomic_read(&cfs_rq->tg->load_weight));
#endif
print_cfs_group_stats(m, cpu, cfs_rq->tg);
#endif
}
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
char path[128];
struct task_group *tg = rt_rq->tg;
task_group_path(tg, path, sizeof(path));
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
#else
SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
#endif
#define P(x) \
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
......@@ -243,6 +207,8 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
#undef P
}
extern __read_mostly int sched_clock_running;
static void print_cpu(struct seq_file *m, int cpu)
{
struct rq *rq = cpu_rq(cpu);
......@@ -314,21 +280,42 @@ static const char *sched_tunable_scaling_names[] = {
static int sched_debug_show(struct seq_file *m, void *v)
{
u64 now = ktime_to_ns(ktime_get());
u64 ktime, sched_clk, cpu_clk;
unsigned long flags;
int cpu;
SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n",
local_irq_save(flags);
ktime = ktime_to_ns(ktime_get());
sched_clk = sched_clock();
cpu_clk = local_clock();
local_irq_restore(flags);
SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
#define P(x) \
SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
#define PN(x) \
SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
PN(ktime);
PN(sched_clk);
PN(cpu_clk);
P(jiffies);
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
P(sched_clock_stable);
#endif
#undef PN
#undef P
SEQ_printf(m, "\n");
SEQ_printf(m, "sysctl_sched\n");
#define P(x) \
SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
#define PN(x) \
SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
P(jiffies);
PN(sysctl_sched_latency);
PN(sysctl_sched_min_granularity);
PN(sysctl_sched_wakeup_granularity);
......
......@@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
/*
* The exponential sliding window over which load is averaged for shares
* distribution.
* (default: 10msec)
*/
unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
static const struct sched_class fair_sched_class;
/**************************************************************
......@@ -143,6 +150,36 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
return cfs_rq->tg->cfs_rq[this_cpu];
}
static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
if (!cfs_rq->on_list) {
/*
* Ensure we either appear before our parent (if already
* enqueued) or force our parent to appear after us when it is
* enqueued. The fact that we always enqueue bottom-up
* reduces this to two cases.
*/
if (cfs_rq->tg->parent &&
cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
&rq_of(cfs_rq)->leaf_cfs_rq_list);
} else {
list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
&rq_of(cfs_rq)->leaf_cfs_rq_list);
}
cfs_rq->on_list = 1;
}
}
static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
if (cfs_rq->on_list) {
list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
cfs_rq->on_list = 0;
}
}
/* Iterate thr' all leaf cfs_rq's on a runqueue */
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
......@@ -246,6 +283,14 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
return &cpu_rq(this_cpu)->cfs;
}
static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
}
static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
}
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
......@@ -417,7 +462,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
WRT_SYSCTL(sched_min_granularity);
WRT_SYSCTL(sched_latency);
WRT_SYSCTL(sched_wakeup_granularity);
WRT_SYSCTL(sched_shares_ratelimit);
#undef WRT_SYSCTL
return 0;
......@@ -495,6 +539,9 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
return calc_delta_fair(sched_slice(cfs_rq, se), se);
}
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta);
/*
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
......@@ -514,6 +561,10 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
curr->vruntime += delta_exec_weighted;
update_min_vruntime(cfs_rq);
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
cfs_rq->load_unacc_exec_time += delta_exec;
#endif
}
static void update_curr(struct cfs_rq *cfs_rq)
......@@ -633,7 +684,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
list_add(&se->group_node, &cfs_rq->tasks);
}
cfs_rq->nr_running++;
se->on_rq = 1;
}
static void
......@@ -647,9 +697,140 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
list_del_init(&se->group_node);
}
cfs_rq->nr_running--;
se->on_rq = 0;
}
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
int global_update)
{
struct task_group *tg = cfs_rq->tg;
long load_avg;
load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
load_avg -= cfs_rq->load_contribution;
if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
atomic_add(load_avg, &tg->load_weight);
cfs_rq->load_contribution += load_avg;
}
}
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
u64 period = sysctl_sched_shares_window;
u64 now, delta;
unsigned long load = cfs_rq->load.weight;
if (!cfs_rq)
return;
now = rq_of(cfs_rq)->clock;
delta = now - cfs_rq->load_stamp;
/* truncate load history at 4 idle periods */
if (cfs_rq->load_stamp > cfs_rq->load_last &&
now - cfs_rq->load_last > 4 * period) {
cfs_rq->load_period = 0;
cfs_rq->load_avg = 0;
}
cfs_rq->load_stamp = now;
cfs_rq->load_unacc_exec_time = 0;
cfs_rq->load_period += delta;
if (load) {
cfs_rq->load_last = now;
cfs_rq->load_avg += delta * load;
}
/* consider updating load contribution on each fold or truncate */
if (global_update || cfs_rq->load_period > period
|| !cfs_rq->load_period)
update_cfs_rq_load_contribution(cfs_rq, global_update);
while (cfs_rq->load_period > period) {
/*
* Inline assembly required to prevent the compiler
* optimising this loop into a divmod call.
* See __iter_div_u64_rem() for another example of this.
*/
asm("" : "+rm" (cfs_rq->load_period));
cfs_rq->load_period /= 2;
cfs_rq->load_avg /= 2;
}
if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
list_del_leaf_cfs_rq(cfs_rq);
}
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
if (se->on_rq) {
/* commit outstanding execution time */
if (cfs_rq->curr == se)
update_curr(cfs_rq);
account_entity_dequeue(cfs_rq, se);
}
update_load_set(&se->load, weight);
if (se->on_rq)
account_entity_enqueue(cfs_rq, se);
}
static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
{
struct task_group *tg;
struct sched_entity *se;
long load_weight, load, shares;
if (!cfs_rq)
return;
tg = cfs_rq->tg;
se = tg->se[cpu_of(rq_of(cfs_rq))];
if (!se)
return;
load = cfs_rq->load.weight + weight_delta;
load_weight = atomic_read(&tg->load_weight);
load_weight -= cfs_rq->load_contribution;
load_weight += load;
shares = (tg->shares * load);
if (load_weight)
shares /= load_weight;
if (shares < MIN_SHARES)
shares = MIN_SHARES;
if (shares > tg->shares)
shares = tg->shares;
reweight_entity(cfs_rq_of(se), se, shares);
}
static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
{
if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
update_cfs_load(cfs_rq, 0);
update_cfs_shares(cfs_rq, 0);
}
}
#else /* CONFIG_FAIR_GROUP_SCHED */
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
}
static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
{
}
static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
{
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
#ifdef CONFIG_SCHEDSTATS
......@@ -771,6 +952,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
update_cfs_load(cfs_rq, 0);
update_cfs_shares(cfs_rq, se->load.weight);
account_entity_enqueue(cfs_rq, se);
if (flags & ENQUEUE_WAKEUP) {
......@@ -782,6 +965,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
check_spread(cfs_rq, se);
if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se);
se->on_rq = 1;
if (cfs_rq->nr_running == 1)
list_add_leaf_cfs_rq(cfs_rq);
}
static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
......@@ -825,8 +1012,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
se->on_rq = 0;
update_cfs_load(cfs_rq, 0);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
update_cfs_shares(cfs_rq, 0);
/*
* Normalize the entity after updating the min_vruntime because the
......@@ -955,6 +1145,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
*/
update_curr(cfs_rq);
/*
* Update share accounting for long-running entities.
*/
update_entity_shares_tick(cfs_rq);
#ifdef CONFIG_SCHED_HRTICK
/*
* queued ticks are scheduled to match the slice, so don't bother
......@@ -1055,6 +1250,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
flags = ENQUEUE_WAKEUP;
}
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
update_cfs_shares(cfs_rq, 0);
}
hrtick_update(rq);
}
......@@ -1071,12 +1273,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags);
/* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight)
break;
flags |= DEQUEUE_SLEEP;
}
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
update_cfs_shares(cfs_rq, 0);
}
hrtick_update(rq);
}
......@@ -1143,51 +1353,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p)
* Adding load to a group doesn't make a group heavier, but can cause movement
* of group shares between cpus. Assuming the shares were perfectly aligned one
* can calculate the shift in shares.
*
* The problem is that perfectly aligning the shares is rather expensive, hence
* we try to avoid doing that too often - see update_shares(), which ratelimits
* this change.
*
* We compensate this by not only taking the current delta into account, but
* also considering the delta between when the shares were last adjusted and
* now.
*
* We still saw a performance dip, some tracing learned us that between
* cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
* significantly. Therefore try to bias the error in direction of failing
* the affine wakeup.
*
*/
static long effective_load(struct task_group *tg, int cpu,
long wl, long wg)
static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
{
struct sched_entity *se = tg->se[cpu];
if (!tg->parent)
return wl;
/*
* By not taking the decrease of shares on the other cpu into
* account our error leans towards reducing the affine wakeups.
*/
if (!wl && sched_feat(ASYM_EFF_LOAD))
return wl;
for_each_sched_entity(se) {
long S, rw, s, a, b;
long more_w;
/*
* Instead of using this increment, also add the difference
* between when the shares were last updated and now.
*/
more_w = se->my_q->load.weight - se->my_q->rq_weight;
wl += more_w;
wg += more_w;
S = se->my_q->tg->shares;
s = se->my_q->shares;
rw = se->my_q->rq_weight;
s = se->load.weight;
rw = se->my_q->load.weight;
a = S*(rw + wl);
b = S*rw + s*wg;
......@@ -1508,23 +1687,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
sd = tmp;
}
#ifdef CONFIG_FAIR_GROUP_SCHED
if (sched_feat(LB_SHARES_UPDATE)) {
/*
* Pick the largest domain to update shares over
*/
tmp = sd;
if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
tmp = affine_sd;
if (tmp) {
raw_spin_unlock(&rq->lock);
update_shares(tmp);
raw_spin_lock(&rq->lock);
}
}
#endif
if (affine_sd) {
if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
return select_idle_sibling(p, cpu);
......@@ -1909,6 +2071,48 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
}
#ifdef CONFIG_FAIR_GROUP_SCHED
/*
* update tg->load_weight by folding this cpu's load_avg
*/
static int update_shares_cpu(struct task_group *tg, int cpu)
{
struct cfs_rq *cfs_rq;
unsigned long flags;
struct rq *rq;
if (!tg->se[cpu])
return 0;
rq = cpu_rq(cpu);
cfs_rq = tg->cfs_rq[cpu];
raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
update_cfs_load(cfs_rq, 1);
/*
* We need to update shares after updating tg->load_weight in
* order to adjust the weight of groups with long running tasks.
*/
update_cfs_shares(cfs_rq, 0);
raw_spin_unlock_irqrestore(&rq->lock, flags);
return 0;
}
static void update_shares(int cpu)
{
struct cfs_rq *cfs_rq;
struct rq *rq = cpu_rq(cpu);
rcu_read_lock();
for_each_leaf_cfs_rq(rq, cfs_rq)
update_shares_cpu(cfs_rq->tg, cpu);
rcu_read_unlock();
}
static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move,
......@@ -1956,6 +2160,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
return max_load_move - rem_load_move;
}
#else
static inline void update_shares(int cpu)
{
}
static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move,
......@@ -3032,7 +3240,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
schedstat_inc(sd, lb_count[idle]);
redo:
update_shares(sd);
group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
cpus, balance);
......@@ -3174,8 +3381,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
else
ld_moved = 0;
out:
if (ld_moved)
update_shares(sd);
return ld_moved;
}
......@@ -3199,6 +3404,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
*/
raw_spin_unlock(&this_rq->lock);
update_shares(this_cpu);
for_each_domain(this_cpu, sd) {
unsigned long interval;
int balance = 1;
......@@ -3569,6 +3775,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
int update_next_balance = 0;
int need_serialize;
update_shares(cpu);
for_each_domain(cpu, sd) {
if (!(sd->flags & SD_LOAD_BALANCE))
continue;
......
......@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0)
SCHED_FEAT(HRTICK, 0)
SCHED_FEAT(DOUBLE_TICK, 0)
SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(LB_SHARES_UPDATE, 1)
SCHED_FEAT(ASYM_EFF_LOAD, 1)
/*
* Spin-wait on mutex acquisition when the mutex owner is running on
......
......@@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
}
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
list_add_rcu(&rt_rq->leaf_rt_rq_list,
&rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
}
static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
{
list_del_rcu(&rt_rq->leaf_rt_rq_list);
}
#define for_each_leaf_rt_rq(rt_rq, rq) \
list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
......@@ -276,6 +287,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
return ktime_to_ns(def_rt_bandwidth.rt_period);
}
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
}
static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
{
}
#define for_each_leaf_rt_rq(rt_rq, rq) \
for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
......@@ -825,6 +844,9 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
return;
if (!rt_rq->rt_nr_running)
list_add_leaf_rt_rq(rt_rq);
if (head)
list_add(&rt_se->run_list, queue);
else
......@@ -844,6 +866,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
__clear_bit(rt_se_prio(rt_se), array->bitmap);
dec_rt_tasks(rt_se, rt_rq);
if (!rt_rq->rt_nr_running)
list_del_leaf_rt_rq(rt_rq);
}
/*
......
......@@ -853,7 +853,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
cpumask_any(cpu_online_mask));
case CPU_DEAD:
case CPU_DEAD_FROZEN: {
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
static struct sched_param param = {
.sched_priority = MAX_RT_PRIO-1
};
p = per_cpu(ksoftirqd, hotcpu);
per_cpu(ksoftirqd, hotcpu) = NULL;
......
......@@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid)
err = session;
out:
write_unlock_irq(&tasklist_lock);
if (err > 0)
if (err > 0) {
proc_sid_connector(group_leader);
sched_autogroup_create_attach(group_leader);
}
return err;
}
......
......@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */
static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
static int min_sched_shares_ratelimit = 100000; /* 100 usec */
static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
#endif
#ifdef CONFIG_COMPACTION
......@@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = {
.extra1 = &min_wakeup_granularity_ns,
.extra2 = &max_wakeup_granularity_ns,
},
{
.procname = "sched_shares_ratelimit",
.data = &sysctl_sched_shares_ratelimit,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_proc_update_handler,
.extra1 = &min_sched_shares_ratelimit,
.extra2 = &max_sched_shares_ratelimit,
},
{
.procname = "sched_tunable_scaling",
.data = &sysctl_sched_tunable_scaling,
......@@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = {
.extra1 = &min_sched_tunable_scaling,
.extra2 = &max_sched_tunable_scaling,
},
{
.procname = "sched_shares_thresh",
.data = &sysctl_sched_shares_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
{
.procname = "sched_migration_cost",
.data = &sysctl_sched_migration_cost,
......@@ -351,6 +332,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_shares_window",
.data = &sysctl_sched_shares_window,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "timer_migration",
.data = &sysctl_timer_migration,
......@@ -382,6 +370,17 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_SCHED_AUTOGROUP
{
.procname = "sched_autogroup_enabled",
.data = &sysctl_sched_autogroup_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
.extra1 = &zero,
.extra2 = &one,
},
#endif
#ifdef CONFIG_PROVE_LOCKING
{
.procname = "prove_locking",
......
......@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
static int trace_wakeup_test_thread(void *data)
{
/* Make this a RT thread, doesn't need to be too high */
struct sched_param param = { .sched_priority = 5 };
static struct sched_param param = { .sched_priority = 5 };
struct completion *x = data;
sched_setscheduler(current, SCHED_FIFO, &param);
......
......@@ -309,7 +309,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
*/
static int watchdog(void *unused)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
sched_setscheduler(current, SCHED_FIFO, &param);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册