cpuacct.c 8.3 KB
Newer Older
1 2 3 4 5 6 7 8
#include <linux/cgroup.h>
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/spinlock.h>
#include <linux/cpumask.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <linux/kernel_stat.h>
9
#include <linux/err.h>
10 11 12 13 14 15 16 17 18 19

#include "sched.h"

/*
 * CPU accounting code for task groups.
 *
 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
 * (balbir@in.ibm.com).
 */

L
Li Zefan 已提交
20 21 22 23 24 25 26 27
/* Time spent by the tasks of the cpu accounting group executing in ... */
enum cpuacct_stat_index {
	CPUACCT_STAT_USER,	/* ... user mode */
	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */

	CPUACCT_STAT_NSTATS,
};

28 29 30
static const char * const cpuacct_stat_desc[] = {
	[CPUACCT_STAT_USER] = "user",
	[CPUACCT_STAT_SYSTEM] = "system",
31 32 33
};

struct cpuacct_usage {
34
	u64	usages[CPUACCT_STAT_NSTATS];
35 36
};

L
Li Zefan 已提交
37 38 39 40
/* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
	struct cgroup_subsys_state css;
	/* cpuusage holds pointer to a u64-type object on every cpu */
41
	struct cpuacct_usage __percpu *cpuusage;
L
Li Zefan 已提交
42 43 44
	struct kernel_cpustat __percpu *cpustat;
};

45 46 47 48 49
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
{
	return css ? container_of(css, struct cpuacct, css) : NULL;
}

L
Li Zefan 已提交
50 51 52
/* return cpu accounting group to which this task belongs */
static inline struct cpuacct *task_ca(struct task_struct *tsk)
{
53
	return css_ca(task_css(tsk, cpuacct_cgrp_id));
L
Li Zefan 已提交
54 55 56 57
}

static inline struct cpuacct *parent_ca(struct cpuacct *ca)
{
T
Tejun Heo 已提交
58
	return css_ca(ca->css.parent);
L
Li Zefan 已提交
59 60
}

61
static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
62 63 64 65
static struct cpuacct root_cpuacct = {
	.cpustat	= &kernel_cpustat,
	.cpuusage	= &root_cpuacct_cpuusage,
};
66 67

/* create a new cpu accounting group */
68 69
static struct cgroup_subsys_state *
cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
70 71 72
{
	struct cpuacct *ca;

73
	if (!parent_css)
74 75 76 77 78 79
		return &root_cpuacct.css;

	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
	if (!ca)
		goto out;

80
	ca->cpuusage = alloc_percpu(struct cpuacct_usage);
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
	if (!ca->cpuusage)
		goto out_free_ca;

	ca->cpustat = alloc_percpu(struct kernel_cpustat);
	if (!ca->cpustat)
		goto out_free_cpuusage;

	return &ca->css;

out_free_cpuusage:
	free_percpu(ca->cpuusage);
out_free_ca:
	kfree(ca);
out:
	return ERR_PTR(-ENOMEM);
}

/* destroy an existing cpu accounting group */
99
static void cpuacct_css_free(struct cgroup_subsys_state *css)
100
{
101
	struct cpuacct *ca = css_ca(css);
102 103 104 105 106 107

	free_percpu(ca->cpustat);
	free_percpu(ca->cpuusage);
	kfree(ca);
}

108
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
109
				 enum cpuacct_stat_index index)
110
{
111
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
112 113
	u64 data;

114
	/*
115
	 * We allow index == CPUACCT_STAT_NSTATS here to read
116 117
	 * the sum of suages.
	 */
118
	BUG_ON(index > CPUACCT_STAT_NSTATS);
119

120 121 122 123 124
#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
125 126
#endif

127
	if (index == CPUACCT_STAT_NSTATS) {
128 129 130
		int i = 0;

		data = 0;
131
		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
132 133 134 135 136 137
			data += cpuusage->usages[i];
	} else {
		data = cpuusage->usages[index];
	}

#ifndef CONFIG_64BIT
138 139 140 141 142 143 144 145
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif

	return data;
}

static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
{
146 147
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
	int i;
148 149 150 151 152 153

#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
154 155
#endif

156
	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
157 158 159
		cpuusage->usages[i] = val;

#ifndef CONFIG_64BIT
160 161 162 163 164
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
}

/* return total cpu usage (in nanoseconds) of a group */
165
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
166
			   enum cpuacct_stat_index index)
167
{
168
	struct cpuacct *ca = css_ca(css);
169 170 171
	u64 totalcpuusage = 0;
	int i;

172
	for_each_possible_cpu(i)
173
		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
174 175 176 177

	return totalcpuusage;
}

178 179 180
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
			      struct cftype *cft)
{
181
	return __cpuusage_read(css, CPUACCT_STAT_USER);
182 183 184 185 186
}

static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
			     struct cftype *cft)
{
187
	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
188 189 190 191
}

static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
192
	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
193 194
}

195
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
196
			  u64 val)
197
{
198
	struct cpuacct *ca = css_ca(css);
199
	int cpu;
200

201 202 203
	/*
	 * Only allow '0' here to do a reset.
	 */
204 205
	if (val)
		return -EINVAL;
206

207 208
	for_each_possible_cpu(cpu)
		cpuacct_cpuusage_write(ca, cpu, 0);
209

210
	return 0;
211 212
}

213
static int __cpuacct_percpu_seq_show(struct seq_file *m,
214
				     enum cpuacct_stat_index index)
215
{
216
	struct cpuacct *ca = css_ca(seq_css(m));
217 218 219
	u64 percpu;
	int i;

220
	for_each_possible_cpu(i) {
221
		percpu = cpuacct_cpuusage_read(ca, i, index);
222 223 224 225 226 227
		seq_printf(m, "%llu ", (unsigned long long) percpu);
	}
	seq_printf(m, "\n");
	return 0;
}

228 229
static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
{
230
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
231 232 233 234
}

static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
{
235
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
236 237 238 239
}

static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
{
240
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
241 242
}

243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
static int cpuacct_all_seq_show(struct seq_file *m, void *V)
{
	struct cpuacct *ca = css_ca(seq_css(m));
	int index;
	int cpu;

	seq_puts(m, "cpu");
	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
		seq_printf(m, " %s", cpuacct_stat_desc[index]);
	seq_puts(m, "\n");

	for_each_possible_cpu(cpu) {
		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);

		seq_printf(m, "%d", cpu);

		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
#ifndef CONFIG_64BIT
			/*
			 * Take rq->lock to make 64-bit read safe on 32-bit
			 * platforms.
			 */
			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
#endif

			seq_printf(m, " %llu", cpuusage->usages[index]);

#ifndef CONFIG_64BIT
			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
		}
		seq_puts(m, "\n");
	}
	return 0;
}

279
static int cpuacct_stats_show(struct seq_file *sf, void *v)
280
{
281
	struct cpuacct *ca = css_ca(seq_css(sf));
282
	s64 val[CPUACCT_STAT_NSTATS];
283
	int cpu;
284
	int stat;
285

286
	memset(val, 0, sizeof(val));
287
	for_each_possible_cpu(cpu) {
288
		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
289

290 291 292 293 294
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
295 296
	}

297 298 299 300 301
	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
		seq_printf(sf, "%s %lld\n",
			   cpuacct_stat_desc[stat],
			   cputime64_to_clock_t(val[stat]));
	}
302 303 304 305 306 307 308 309 310 311

	return 0;
}

static struct cftype files[] = {
	{
		.name = "usage",
		.read_u64 = cpuusage_read,
		.write_u64 = cpuusage_write,
	},
312 313 314 315 316 317 318 319
	{
		.name = "usage_user",
		.read_u64 = cpuusage_user_read,
	},
	{
		.name = "usage_sys",
		.read_u64 = cpuusage_sys_read,
	},
320 321
	{
		.name = "usage_percpu",
322
		.seq_show = cpuacct_percpu_seq_show,
323
	},
324 325 326 327 328 329 330 331
	{
		.name = "usage_percpu_user",
		.seq_show = cpuacct_percpu_user_seq_show,
	},
	{
		.name = "usage_percpu_sys",
		.seq_show = cpuacct_percpu_sys_seq_show,
	},
332 333 334 335
	{
		.name = "usage_all",
		.seq_show = cpuacct_all_seq_show,
	},
336 337
	{
		.name = "stat",
338
		.seq_show = cpuacct_stats_show,
339 340 341 342 343 344 345 346 347 348 349 350
	},
	{ }	/* terminate */
};

/*
 * charge this task's execution time to its accounting group.
 *
 * called with rq->lock held.
 */
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
	struct cpuacct *ca;
351
	int index = CPUACCT_STAT_SYSTEM;
352
	struct pt_regs *regs = task_pt_regs(tsk);
353

354
	if (regs && user_mode(regs))
355
		index = CPUACCT_STAT_USER;
356 357

	rcu_read_lock();
358

359
	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
360 361
		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;

362 363 364
	rcu_read_unlock();
}

365 366 367 368 369
/*
 * Add user/system time to cpuacct.
 *
 * Note: it's the caller that updates the account of the root cgroup.
 */
370
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
371 372 373 374
{
	struct cpuacct *ca;

	rcu_read_lock();
375 376
	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
377 378 379
	rcu_read_unlock();
}

380
struct cgroup_subsys cpuacct_cgrp_subsys = {
381 382
	.css_alloc	= cpuacct_css_alloc,
	.css_free	= cpuacct_css_free,
383
	.legacy_cftypes	= files,
384
	.early_init	= true,
385
};