sched_stats.h 7.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11

#ifdef CONFIG_SCHEDSTATS
/*
 * bump this up when changing the output format or the meaning of an existing
 * format, so that tools can adapt (or abort)
 */
#define SCHEDSTAT_VERSION 14

static int show_schedstat(struct seq_file *seq, void *v)
{
	int cpu;
12 13 14 15 16
	int mask_len = NR_CPUS/32 * 9;
	char *mask_str = kmalloc(mask_len, GFP_KERNEL);

	if (mask_str == NULL)
		return -ENOMEM;
17 18 19 20 21 22 23

	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
	seq_printf(seq, "timestamp %lu\n", jiffies);
	for_each_online_cpu(cpu) {
		struct rq *rq = cpu_rq(cpu);
#ifdef CONFIG_SMP
		struct sched_domain *sd;
24
		int dcount = 0;
25 26 27 28
#endif

		/* runqueue-specific stats */
		seq_printf(seq,
29
		    "cpu%d %u %u %u %u %u %u %u %u %u %llu %llu %lu",
30
		    cpu, rq->yld_both_empty,
31 32 33
		    rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
		    rq->ttwu_count, rq->ttwu_local,
34
		    rq->rq_sched_info.cpu_time,
35
		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
36 37 38 39 40 41 42 43 44

		seq_printf(seq, "\n");

#ifdef CONFIG_SMP
		/* domain-specific stats */
		preempt_disable();
		for_each_domain(cpu, sd) {
			enum cpu_idle_type itype;

45
			cpumask_scnprintf(mask_str, mask_len, sd->span);
46
			seq_printf(seq, "domain%d %s", dcount++, mask_str);
47 48
			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
					itype++) {
49
				seq_printf(seq, " %u %u %u %u %u %u %u %u",
50
				    sd->lb_count[itype],
51 52 53 54 55 56 57 58
				    sd->lb_balanced[itype],
				    sd->lb_failed[itype],
				    sd->lb_imbalance[itype],
				    sd->lb_gained[itype],
				    sd->lb_hot_gained[itype],
				    sd->lb_nobusyq[itype],
				    sd->lb_nobusyg[itype]);
			}
I
Ingo Molnar 已提交
59 60
			seq_printf(seq,
				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
61 62 63
			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
64 65 66 67 68 69
			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
			    sd->ttwu_move_balance);
		}
		preempt_enable();
#endif
	}
A
Adrian Bunk 已提交
70
	kfree(mask_str);
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
	return 0;
}

static int schedstat_open(struct inode *inode, struct file *file)
{
	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
	char *buf = kmalloc(size, GFP_KERNEL);
	struct seq_file *m;
	int res;

	if (!buf)
		return -ENOMEM;
	res = single_open(file, show_schedstat, NULL);
	if (!res) {
		m = file->private_data;
		m->buf = buf;
		m->size = size;
	} else
		kfree(buf);
	return res;
}

const struct file_operations proc_schedstat_operations = {
	.open    = schedstat_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
	.release = single_release,
};

/*
 * Expects runqueue lock to be held for atomicity of update
 */
static inline void
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
{
	if (rq) {
		rq->rq_sched_info.run_delay += delta;
108
		rq->rq_sched_info.pcount++;
109 110 111 112 113 114 115 116 117 118 119 120 121 122
	}
}

/*
 * Expects runqueue lock to be held for atomicity of update
 */
static inline void
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
{
	if (rq)
		rq->rq_sched_info.cpu_time += delta;
}
# define schedstat_inc(rq, field)	do { (rq)->field++; } while (0)
# define schedstat_add(rq, field, amt)	do { (rq)->field += (amt); } while (0)
123
# define schedstat_set(var, val)	do { var = (val); } while (0)
124 125 126 127 128 129 130 131 132
#else /* !CONFIG_SCHEDSTATS */
static inline void
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
{}
static inline void
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
{}
# define schedstat_inc(rq, field)	do { } while (0)
# define schedstat_add(rq, field, amt)	do { } while (0)
133
# define schedstat_set(var, val)	do { } while (0)
134 135
#endif

136
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
/*
 * Called when a process is dequeued from the active array and given
 * the cpu.  We should note that with the exception of interactive
 * tasks, the expired queue will become the active queue after the active
 * queue is empty, without explicitly dequeuing and requeuing tasks in the
 * expired queue.  (Interactive tasks may be requeued directly to the
 * active queue, thus delaying tasks in the expired queue from running;
 * see scheduler_tick()).
 *
 * This function is only called from sched_info_arrive(), rather than
 * dequeue_task(). Even though a task may be queued and dequeued multiple
 * times as it is shuffled about, we're really interested in knowing how
 * long it was from the *first* time it was queued to the time that it
 * finally hit a cpu.
 */
static inline void sched_info_dequeued(struct task_struct *t)
{
	t->sched_info.last_queued = 0;
}

/*
 * Called when a task finally hits the cpu.  We can now calculate how
 * long it was waiting to run.  We also note when it began so that we
 * can keep stats on how long its timeslice is.
 */
static void sched_info_arrive(struct task_struct *t)
{
164
	unsigned long long now = task_rq(t)->clock, delta = 0;
165 166 167 168 169 170

	if (t->sched_info.last_queued)
		delta = now - t->sched_info.last_queued;
	sched_info_dequeued(t);
	t->sched_info.run_delay += delta;
	t->sched_info.last_arrival = now;
171
	t->sched_info.pcount++;
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194

	rq_sched_info_arrive(task_rq(t), delta);
}

/*
 * Called when a process is queued into either the active or expired
 * array.  The time is noted and later used to determine how long we
 * had to wait for us to reach the cpu.  Since the expired queue will
 * become the active queue after active queue is empty, without dequeuing
 * and requeuing any tasks, we are interested in queuing to either. It
 * is unusual but not impossible for tasks to be dequeued and immediately
 * requeued in the same or another array: this can happen in sched_yield(),
 * set_user_nice(), and even load_balance() as it moves tasks from runqueue
 * to runqueue.
 *
 * This function is only called from enqueue_task(), but also only updates
 * the timestamp if it is already not set.  It's assumed that
 * sched_info_dequeued() will clear that stamp when appropriate.
 */
static inline void sched_info_queued(struct task_struct *t)
{
	if (unlikely(sched_info_on()))
		if (!t->sched_info.last_queued)
195
			t->sched_info.last_queued = task_rq(t)->clock;
196 197 198 199 200 201 202 203
}

/*
 * Called when a process ceases being the active-running process, either
 * voluntarily or involuntarily.  Now we can calculate how long we ran.
 */
static inline void sched_info_depart(struct task_struct *t)
{
204 205
	unsigned long long delta = task_rq(t)->clock -
					t->sched_info.last_arrival;
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

	t->sched_info.cpu_time += delta;
	rq_sched_info_depart(task_rq(t), delta);
}

/*
 * Called when tasks are switched involuntarily due, typically, to expiring
 * their time slice.  (This may also be called when switching to or from
 * the idle task.)  We are only called when prev != next.
 */
static inline void
__sched_info_switch(struct task_struct *prev, struct task_struct *next)
{
	struct rq *rq = task_rq(prev);

	/*
	 * prev now departs the cpu.  It's not interesting to record
	 * stats about how efficient we were at scheduling the idle
	 * process, however.
	 */
	if (prev != rq->idle)
		sched_info_depart(prev);

	if (next != rq->idle)
		sched_info_arrive(next);
}
static inline void
sched_info_switch(struct task_struct *prev, struct task_struct *next)
{
	if (unlikely(sched_info_on()))
		__sched_info_switch(prev, next);
}
#else
#define sched_info_queued(t)		do { } while (0)
#define sched_info_switch(t, next)	do { } while (0)
241
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
242