cpuacct.c 8.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7
/*
 * CPU accounting code for task groups.
 *
 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
 * (balbir@in.ibm.com).
 */
8
#include "sched.h"
9

10
/* Time spent by the tasks of the CPU accounting group executing in ... */
L
Li Zefan 已提交
11 12 13 14 15 16 17
enum cpuacct_stat_index {
	CPUACCT_STAT_USER,	/* ... user mode */
	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */

	CPUACCT_STAT_NSTATS,
};

18 19 20
static const char * const cpuacct_stat_desc[] = {
	[CPUACCT_STAT_USER] = "user",
	[CPUACCT_STAT_SYSTEM] = "system",
21 22 23
};

struct cpuacct_usage {
24
	u64	usages[CPUACCT_STAT_NSTATS];
25 26
};

27
/* track CPU usage of a group of tasks and its child groups */
L
Li Zefan 已提交
28
struct cpuacct {
29 30 31 32
	struct cgroup_subsys_state	css;
	/* cpuusage holds pointer to a u64-type object on every CPU */
	struct cpuacct_usage __percpu	*cpuusage;
	struct kernel_cpustat __percpu	*cpustat;
L
Li Zefan 已提交
33 34
};

35 36 37 38 39
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
{
	return css ? container_of(css, struct cpuacct, css) : NULL;
}

40
/* Return CPU accounting group to which this task belongs */
L
Li Zefan 已提交
41 42
static inline struct cpuacct *task_ca(struct task_struct *tsk)
{
43
	return css_ca(task_css(tsk, cpuacct_cgrp_id));
L
Li Zefan 已提交
44 45 46 47
}

static inline struct cpuacct *parent_ca(struct cpuacct *ca)
{
T
Tejun Heo 已提交
48
	return css_ca(ca->css.parent);
L
Li Zefan 已提交
49 50
}

51
static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
52 53 54 55
static struct cpuacct root_cpuacct = {
	.cpustat	= &kernel_cpustat,
	.cpuusage	= &root_cpuacct_cpuusage,
};
56

57
/* Create a new CPU accounting group */
58 59
static struct cgroup_subsys_state *
cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
60 61 62
{
	struct cpuacct *ca;

63
	if (!parent_css)
64 65 66 67 68 69
		return &root_cpuacct.css;

	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
	if (!ca)
		goto out;

70
	ca->cpuusage = alloc_percpu(struct cpuacct_usage);
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
	if (!ca->cpuusage)
		goto out_free_ca;

	ca->cpustat = alloc_percpu(struct kernel_cpustat);
	if (!ca->cpustat)
		goto out_free_cpuusage;

	return &ca->css;

out_free_cpuusage:
	free_percpu(ca->cpuusage);
out_free_ca:
	kfree(ca);
out:
	return ERR_PTR(-ENOMEM);
}

88
/* Destroy an existing CPU accounting group */
89
static void cpuacct_css_free(struct cgroup_subsys_state *css)
90
{
91
	struct cpuacct *ca = css_ca(css);
92 93 94 95 96 97

	free_percpu(ca->cpustat);
	free_percpu(ca->cpuusage);
	kfree(ca);
}

98
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
99
				 enum cpuacct_stat_index index)
100
{
101
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
102 103
	u64 data;

104
	/*
105
	 * We allow index == CPUACCT_STAT_NSTATS here to read
106 107
	 * the sum of suages.
	 */
108
	BUG_ON(index > CPUACCT_STAT_NSTATS);
109

110 111 112 113 114
#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
115 116
#endif

117
	if (index == CPUACCT_STAT_NSTATS) {
118 119 120
		int i = 0;

		data = 0;
121
		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
122 123 124 125 126 127
			data += cpuusage->usages[i];
	} else {
		data = cpuusage->usages[index];
	}

#ifndef CONFIG_64BIT
128 129 130 131 132 133 134 135
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif

	return data;
}

static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
{
136 137
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
	int i;
138 139 140 141 142 143

#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
144 145
#endif

146
	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
147 148 149
		cpuusage->usages[i] = val;

#ifndef CONFIG_64BIT
150 151 152 153
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
}

154
/* Return total CPU usage (in nanoseconds) of a group */
155
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
156
			   enum cpuacct_stat_index index)
157
{
158
	struct cpuacct *ca = css_ca(css);
159 160 161
	u64 totalcpuusage = 0;
	int i;

162
	for_each_possible_cpu(i)
163
		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
164 165 166 167

	return totalcpuusage;
}

168 169 170
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
			      struct cftype *cft)
{
171
	return __cpuusage_read(css, CPUACCT_STAT_USER);
172 173 174 175 176
}

static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
			     struct cftype *cft)
{
177
	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
178 179 180 181
}

static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
182
	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
183 184
}

185
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
186
			  u64 val)
187
{
188
	struct cpuacct *ca = css_ca(css);
189
	int cpu;
190

191 192 193
	/*
	 * Only allow '0' here to do a reset.
	 */
194 195
	if (val)
		return -EINVAL;
196

197 198
	for_each_possible_cpu(cpu)
		cpuacct_cpuusage_write(ca, cpu, 0);
199

200
	return 0;
201 202
}

203
static int __cpuacct_percpu_seq_show(struct seq_file *m,
204
				     enum cpuacct_stat_index index)
205
{
206
	struct cpuacct *ca = css_ca(seq_css(m));
207 208 209
	u64 percpu;
	int i;

210
	for_each_possible_cpu(i) {
211
		percpu = cpuacct_cpuusage_read(ca, i, index);
212 213 214 215 216 217
		seq_printf(m, "%llu ", (unsigned long long) percpu);
	}
	seq_printf(m, "\n");
	return 0;
}

218 219
static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
{
220
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
221 222 223 224
}

static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
{
225
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
226 227 228 229
}

static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
{
230
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
231 232
}

233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
static int cpuacct_all_seq_show(struct seq_file *m, void *V)
{
	struct cpuacct *ca = css_ca(seq_css(m));
	int index;
	int cpu;

	seq_puts(m, "cpu");
	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
		seq_printf(m, " %s", cpuacct_stat_desc[index]);
	seq_puts(m, "\n");

	for_each_possible_cpu(cpu) {
		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);

		seq_printf(m, "%d", cpu);

		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
#ifndef CONFIG_64BIT
			/*
			 * Take rq->lock to make 64-bit read safe on 32-bit
			 * platforms.
			 */
			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
#endif

			seq_printf(m, " %llu", cpuusage->usages[index]);

#ifndef CONFIG_64BIT
			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
		}
		seq_puts(m, "\n");
	}
	return 0;
}

269
static int cpuacct_stats_show(struct seq_file *sf, void *v)
270
{
271
	struct cpuacct *ca = css_ca(seq_css(sf));
272
	s64 val[CPUACCT_STAT_NSTATS];
273
	int cpu;
274
	int stat;
275

276
	memset(val, 0, sizeof(val));
277
	for_each_possible_cpu(cpu) {
278
		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
279

280 281 282 283 284
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
285 286
	}

287 288 289
	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
		seq_printf(sf, "%s %lld\n",
			   cpuacct_stat_desc[stat],
290
			   (long long)nsec_to_clock_t(val[stat]));
291
	}
292 293 294 295 296 297 298 299 300 301

	return 0;
}

static struct cftype files[] = {
	{
		.name = "usage",
		.read_u64 = cpuusage_read,
		.write_u64 = cpuusage_write,
	},
302 303 304 305 306 307 308 309
	{
		.name = "usage_user",
		.read_u64 = cpuusage_user_read,
	},
	{
		.name = "usage_sys",
		.read_u64 = cpuusage_sys_read,
	},
310 311
	{
		.name = "usage_percpu",
312
		.seq_show = cpuacct_percpu_seq_show,
313
	},
314 315 316 317 318 319 320 321
	{
		.name = "usage_percpu_user",
		.seq_show = cpuacct_percpu_user_seq_show,
	},
	{
		.name = "usage_percpu_sys",
		.seq_show = cpuacct_percpu_sys_seq_show,
	},
322 323 324 325
	{
		.name = "usage_all",
		.seq_show = cpuacct_all_seq_show,
	},
326 327
	{
		.name = "stat",
328
		.seq_show = cpuacct_stats_show,
329 330 331 332 333 334 335 336 337 338 339 340
	},
	{ }	/* terminate */
};

/*
 * charge this task's execution time to its accounting group.
 *
 * called with rq->lock held.
 */
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
	struct cpuacct *ca;
341
	int index = CPUACCT_STAT_SYSTEM;
342
	struct pt_regs *regs = task_pt_regs(tsk);
343

344
	if (regs && user_mode(regs))
345
		index = CPUACCT_STAT_USER;
346 347

	rcu_read_lock();
348

349
	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
350 351
		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;

352 353 354
	rcu_read_unlock();
}

355 356 357 358 359
/*
 * Add user/system time to cpuacct.
 *
 * Note: it's the caller that updates the account of the root cgroup.
 */
360
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
361 362 363 364
{
	struct cpuacct *ca;

	rcu_read_lock();
365 366
	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
367 368 369
	rcu_read_unlock();
}

370
struct cgroup_subsys cpuacct_cgrp_subsys = {
371 372
	.css_alloc	= cpuacct_css_alloc,
	.css_free	= cpuacct_css_free,
373
	.legacy_cftypes	= files,
374
	.early_init	= true,
375
};
376 377

#ifdef CONFIG_PSI
378 379 380 381 382 383 384 385

static bool psi_v1_enable;
static int __init setup_psi_v1(char *str)
{
	return kstrtobool(str, &psi_v1_enable) == 0;
}
__setup("psi_v1=", setup_psi_v1);

386 387
static int __init cgroup_v1_psi_init(void)
{
388 389 390 391 392
	if (!psi_v1_enable) {
		static_branch_enable(&psi_v1_disabled);
		return 0;
	}

393 394 395 396 397 398
	cgroup_add_legacy_cftypes(&cpuacct_cgrp_subsys, cgroup_v1_psi_files);
	return 0;
}

late_initcall_sync(cgroup_v1_psi_init);
#endif