提交 22a093b2 编写于 作者: L Linus Torvalds

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "Debug info and other statistics fixes and related enhancements"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/numa: Fix numa balancing stats in /proc/pid/sched
  sched/numa: Show numa_group ID in /proc/sched_debug task listings
  sched/debug: Move print_cfs_rq() declaration to kernel/sched/sched.h
  sched/stat: Expose /proc/pid/schedstat if CONFIG_SCHED_INFO=y
  sched/stat: Simplify the sched_info accounting dependency
......@@ -491,13 +491,16 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
}
#endif
#ifdef CONFIG_SCHEDSTATS
#ifdef CONFIG_SCHED_INFO
/*
* Provides /proc/PID/schedstat
*/
static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
if (unlikely(!sched_info_on()))
seq_printf(m, "0 0 0\n");
else
seq_printf(m, "%llu %llu %lu\n",
(unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)task->sched_info.run_delay,
......@@ -2787,7 +2790,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
#ifdef CONFIG_SCHED_INFO
ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
......@@ -3135,7 +3138,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
#ifdef CONFIG_SCHED_INFO
ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
......
......@@ -192,8 +192,6 @@ struct task_group;
#ifdef CONFIG_SCHED_DEBUG
extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
extern void proc_sched_set_task(struct task_struct *p);
extern void
print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
#endif
/*
......@@ -838,7 +836,7 @@ extern struct user_struct root_user;
struct backing_dev_info;
struct reclaim_state;
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
#ifdef CONFIG_SCHED_INFO
struct sched_info {
/* cumulative counters */
unsigned long pcount; /* # of times run on this cpu */
......@@ -848,7 +846,7 @@ struct sched_info {
unsigned long long last_arrival,/* when we last ran on a cpu */
last_queued; /* when we were last queued to run */
};
#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
#endif /* CONFIG_SCHED_INFO */
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
......@@ -1397,7 +1395,7 @@ struct task_struct {
int rcu_tasks_idle_cpu;
#endif /* #ifdef CONFIG_TASKS_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
#ifdef CONFIG_SCHED_INFO
struct sched_info sched_info;
#endif
......
......@@ -435,6 +435,7 @@ config TASKSTATS
config TASK_DELAY_ACCT
bool "Enable per-task delay accounting"
depends on TASKSTATS
select SCHED_INFO
help
Collect information on time spent by a task waiting for system
resources like cpu, synchronous block I/O completion and swapping
......
......@@ -2164,7 +2164,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
set_task_cpu(p, cpu);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
#ifdef CONFIG_SCHED_INFO
if (likely(sched_info_on()))
memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
......
......@@ -142,7 +142,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
0LL, 0L);
#endif
#ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d", task_node(p));
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
#endif
#ifdef CONFIG_CGROUP_SCHED
SEQ_printf(m, " %s", task_group_path(task_group(p)));
......@@ -517,11 +517,21 @@ __initcall(init_sched_debug_procfs);
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
#ifdef CONFIG_NUMA_BALANCING
void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
unsigned long tpf, unsigned long gsf, unsigned long gpf)
{
SEQ_printf(m, "numa_faults node=%d ", node);
SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf);
SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf);
}
#endif
static void sched_show_numa(struct task_struct *p, struct seq_file *m)
{
#ifdef CONFIG_NUMA_BALANCING
struct mempolicy *pol;
int node, i;
if (p->mm)
P(mm->numa_scan_seq);
......@@ -533,26 +543,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
mpol_get(pol);
task_unlock(p);
SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0));
for_each_online_node(node) {
for (i = 0; i < 2; i++) {
unsigned long nr_faults = -1;
int cpu_current, home_node;
if (p->numa_faults)
nr_faults = p->numa_faults[2*node + i];
cpu_current = !i ? (task_node(p) == node) :
(pol && node_isset(node, pol->v.nodes));
home_node = (p->numa_preferred_nid == node);
SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n",
i, node, cpu_current, home_node, nr_faults);
}
}
P(numa_pages_migrated);
P(numa_preferred_nid);
P(total_numa_faults);
SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
task_node(p), task_numa_group_id(p));
show_numa_stats(p, m);
mpol_put(pol);
#endif
}
......
......@@ -8473,7 +8473,27 @@ void print_cfs_stats(struct seq_file *m, int cpu)
print_cfs_rq(m, cpu, cfs_rq);
rcu_read_unlock();
}
#endif
#ifdef CONFIG_NUMA_BALANCING
void show_numa_stats(struct task_struct *p, struct seq_file *m)
{
int node;
unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
for_each_online_node(node) {
if (p->numa_faults) {
tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
}
if (p->numa_group) {
gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
}
print_numa_stats(m, node, tsf, tpf, gsf, gpf);
}
}
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
__init void init_sched_fair_class(void)
{
......
......@@ -1689,9 +1689,22 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
#ifdef CONFIG_SCHED_DEBUG
extern void print_cfs_stats(struct seq_file *m, int cpu);
extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
extern void
print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
#ifdef CONFIG_NUMA_BALANCING
extern void
show_numa_stats(struct task_struct *p, struct seq_file *m);
extern void
print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
unsigned long tpf, unsigned long gsf, unsigned long gpf);
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
extern void init_cfs_rq(struct cfs_rq *cfs_rq);
extern void init_rt_rq(struct rt_rq *rt_rq);
......
......@@ -47,7 +47,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
# define schedstat_set(var, val) do { } while (0)
#endif
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
#ifdef CONFIG_SCHED_INFO
static inline void sched_info_reset_dequeued(struct task_struct *t)
{
t->sched_info.last_queued = 0;
......@@ -156,7 +156,7 @@ sched_info_switch(struct rq *rq,
#define sched_info_depart(rq, t) do { } while (0)
#define sched_info_arrive(rq, next) do { } while (0)
#define sched_info_switch(rq, t, next) do { } while (0)
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
#endif /* CONFIG_SCHED_INFO */
/*
* The following are functions that support scheduler-internal time accounting.
......
......@@ -841,9 +841,14 @@ config SCHED_DEBUG
that can help debug the scheduler. The runtime overhead of this
option is minimal.
config SCHED_INFO
bool
default n
config SCHEDSTATS
bool "Collect scheduler statistics"
depends on DEBUG_KERNEL && PROC_FS
select SCHED_INFO
help
If you say Y here, additional code will be inserted into the
scheduler and related routines to collect statistics about
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册