cpuacct.c 12.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7
/*
 * CPU accounting code for task groups.
 *
 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
 * (balbir@in.ibm.com).
 */
8
#include "sched.h"
9

10
/* Time spent by the tasks of the CPU accounting group executing in ... */
L
Li Zefan 已提交
11 12 13 14 15 16 17
enum cpuacct_stat_index {
	CPUACCT_STAT_USER,	/* ... user mode */
	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */

	CPUACCT_STAT_NSTATS,
};

18 19 20
static const char * const cpuacct_stat_desc[] = {
	[CPUACCT_STAT_USER] = "user",
	[CPUACCT_STAT_SYSTEM] = "system",
21 22 23
};

struct cpuacct_usage {
24
	u64	usages[CPUACCT_STAT_NSTATS];
25 26 27
	struct prev_cputime prev_cputime1; /* utime and stime */
	struct prev_cputime prev_cputime2; /* user and nice */
} ____cacheline_aligned;
28

29
/* track CPU usage of a group of tasks and its child groups */
L
Li Zefan 已提交
30
struct cpuacct {
31 32 33 34
	struct cgroup_subsys_state	css;
	/* cpuusage holds pointer to a u64-type object on every CPU */
	struct cpuacct_usage __percpu	*cpuusage;
	struct kernel_cpustat __percpu	*cpustat;
35 36 37 38 39

	ALI_HOTFIX_RESERVE(1)
	ALI_HOTFIX_RESERVE(2)
	ALI_HOTFIX_RESERVE(3)
	ALI_HOTFIX_RESERVE(4)
L
Li Zefan 已提交
40 41
};

42 43 44 45 46
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
{
	return css ? container_of(css, struct cpuacct, css) : NULL;
}

47
/* Return CPU accounting group to which this task belongs */
L
Li Zefan 已提交
48 49
static inline struct cpuacct *task_ca(struct task_struct *tsk)
{
50
	return css_ca(task_css(tsk, cpuacct_cgrp_id));
L
Li Zefan 已提交
51 52 53 54
}

static inline struct cpuacct *parent_ca(struct cpuacct *ca)
{
T
Tejun Heo 已提交
55
	return css_ca(ca->css.parent);
L
Li Zefan 已提交
56 57
}

58
static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
59 60 61 62
static struct cpuacct root_cpuacct = {
	.cpustat	= &kernel_cpustat,
	.cpuusage	= &root_cpuacct_cpuusage,
};
63

64
/* Create a new CPU accounting group */
65 66
static struct cgroup_subsys_state *
cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
67 68
{
	struct cpuacct *ca;
69
	int i;
70

71
	if (!parent_css)
72 73 74 75 76 77
		return &root_cpuacct.css;

	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
	if (!ca)
		goto out;

78
	ca->cpuusage = alloc_percpu(struct cpuacct_usage);
79 80 81 82 83 84 85
	if (!ca->cpuusage)
		goto out_free_ca;

	ca->cpustat = alloc_percpu(struct kernel_cpustat);
	if (!ca->cpustat)
		goto out_free_cpuusage;

86 87 88 89 90
	for_each_possible_cpu(i) {
		prev_cputime_init(&per_cpu_ptr(ca->cpuusage, i)->prev_cputime1);
		prev_cputime_init(&per_cpu_ptr(ca->cpuusage, i)->prev_cputime2);
	}

91 92 93 94 95 96 97 98 99 100
	return &ca->css;

out_free_cpuusage:
	free_percpu(ca->cpuusage);
out_free_ca:
	kfree(ca);
out:
	return ERR_PTR(-ENOMEM);
}

101
/* Destroy an existing CPU accounting group */
102
static void cpuacct_css_free(struct cgroup_subsys_state *css)
103
{
104
	struct cpuacct *ca = css_ca(css);
105 106 107 108 109 110

	free_percpu(ca->cpustat);
	free_percpu(ca->cpuusage);
	kfree(ca);
}

111
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
112
				 enum cpuacct_stat_index index)
113
{
114
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
115 116
	u64 data;

117
	/*
118
	 * We allow index == CPUACCT_STAT_NSTATS here to read
119 120
	 * the sum of suages.
	 */
121
	BUG_ON(index > CPUACCT_STAT_NSTATS);
122

123 124 125 126 127
#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
128 129
#endif

130
	if (index == CPUACCT_STAT_NSTATS) {
131 132 133
		int i = 0;

		data = 0;
134
		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
135 136 137 138 139 140
			data += cpuusage->usages[i];
	} else {
		data = cpuusage->usages[index];
	}

#ifndef CONFIG_64BIT
141 142 143 144 145 146 147 148
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif

	return data;
}

static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
{
149 150
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
	int i;
151 152 153 154 155 156

#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
157 158
#endif

159
	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
160 161 162
		cpuusage->usages[i] = val;

#ifndef CONFIG_64BIT
163 164 165 166
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
}

167
/* Return total CPU usage (in nanoseconds) of a group */
168
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
169
			   enum cpuacct_stat_index index)
170
{
171
	struct cpuacct *ca = css_ca(css);
172 173 174
	u64 totalcpuusage = 0;
	int i;

175
	for_each_possible_cpu(i)
176
		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
177 178 179 180

	return totalcpuusage;
}

181 182 183
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
			      struct cftype *cft)
{
184
	return __cpuusage_read(css, CPUACCT_STAT_USER);
185 186 187 188 189
}

static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
			     struct cftype *cft)
{
190
	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
191 192 193 194
}

static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
195
	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
196 197
}

198
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
199
			  u64 val)
200
{
201
	struct cpuacct *ca = css_ca(css);
202
	int cpu;
203

204 205 206
	/*
	 * Only allow '0' here to do a reset.
	 */
207 208
	if (val)
		return -EINVAL;
209

210 211
	for_each_possible_cpu(cpu)
		cpuacct_cpuusage_write(ca, cpu, 0);
212

213
	return 0;
214 215
}

216
static int __cpuacct_percpu_seq_show(struct seq_file *m,
217
				     enum cpuacct_stat_index index)
218
{
219
	struct cpuacct *ca = css_ca(seq_css(m));
220 221 222
	u64 percpu;
	int i;

223
	for_each_possible_cpu(i) {
224
		percpu = cpuacct_cpuusage_read(ca, i, index);
225 226 227 228 229 230
		seq_printf(m, "%llu ", (unsigned long long) percpu);
	}
	seq_printf(m, "\n");
	return 0;
}

231 232
static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
{
233
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
234 235 236 237
}

static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
{
238
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
239 240 241 242
}

static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
{
243
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
244 245
}

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static int cpuacct_all_seq_show(struct seq_file *m, void *V)
{
	struct cpuacct *ca = css_ca(seq_css(m));
	int index;
	int cpu;

	seq_puts(m, "cpu");
	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
		seq_printf(m, " %s", cpuacct_stat_desc[index]);
	seq_puts(m, "\n");

	for_each_possible_cpu(cpu) {
		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);

		seq_printf(m, "%d", cpu);

		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
#ifndef CONFIG_64BIT
			/*
			 * Take rq->lock to make 64-bit read safe on 32-bit
			 * platforms.
			 */
			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
#endif

			seq_printf(m, " %llu", cpuusage->usages[index]);

#ifndef CONFIG_64BIT
			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
		}
		seq_puts(m, "\n");
	}
	return 0;
}

282
static int cpuacct_stats_show(struct seq_file *sf, void *v)
283
{
284
	struct cpuacct *ca = css_ca(seq_css(sf));
285
	s64 val[CPUACCT_STAT_NSTATS];
286
	int cpu;
287
	int stat;
288

289
	memset(val, 0, sizeof(val));
290
	for_each_possible_cpu(cpu) {
291
		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
292

293 294 295 296 297
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
298 299
	}

300 301 302
	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
		seq_printf(sf, "%s %lld\n",
			   cpuacct_stat_desc[stat],
303
			   (long long)nsec_to_clock_t(val[stat]));
304
	}
305 306 307 308

	return 0;
}

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
#ifdef CONFIG_SCHED_SLI
#ifndef arch_idle_time
#define arch_idle_time(cpu) 0
#endif

static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
{
	return container_of(global_cgroup_css(cgrp, cpu_cgrp_id),
				struct task_group, css);
}

static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
		struct task_group *tg, struct cpuacct_usage_result *res)
{
	struct kernel_cpustat *kcpustat;
	struct cpuacct_usage *cpuusage;
	struct task_cputime cputime;
	u64 tick_user, tick_nice, tick_sys, left, right;
	struct sched_entity *se;

	kcpustat = per_cpu_ptr(ca->cpustat, cpu);
	if (unlikely(!tg)) {
		memset(res, 0, sizeof(*res));
		return;
	}

	se = tg->se[cpu];
	cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
	tick_user = kcpustat->cpustat[CPUTIME_USER];
	tick_nice = kcpustat->cpustat[CPUTIME_NICE];
	tick_sys = kcpustat->cpustat[CPUTIME_SYSTEM];

	/* Calculate system run time */
	cputime.sum_exec_runtime = cpuusage->usages[CPUACCT_STAT_USER] +
			cpuusage->usages[CPUACCT_STAT_SYSTEM];
	cputime.utime = tick_user + tick_nice;
	cputime.stime = tick_sys;
	cputime_adjust(&cputime, &cpuusage->prev_cputime1, &left, &right);
	res->system = right;

	/* Calculate user and nice run time */
	cputime.sum_exec_runtime = left; /* user + nice */
	cputime.utime = tick_user;
	cputime.stime = tick_nice;
	cputime_adjust(&cputime, &cpuusage->prev_cputime2, &left, &right);
	res->user = left;
	res->nice = right;

	res->irq = kcpustat->cpustat[CPUTIME_IRQ];
	res->softirq = kcpustat->cpustat[CPUTIME_SOFTIRQ];
	if (se)
		res->steal = se->statistics.wait_sum;
	else
		res->steal = 0;
363 364
	res->guest = kcpustat->cpustat[CPUTIME_GUEST];
	res->guest_nice = kcpustat->cpustat[CPUTIME_GUEST_NICE];
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
}

static int cpuacct_proc_stats_show(struct seq_file *sf, void *v)
{
	struct cpuacct *ca = css_ca(seq_css(sf));
	struct cgroup *cgrp = seq_css(sf)->cgroup;
	u64 user, nice, system, idle, iowait, irq, softirq, steal, guest;
	int cpu;

	user = nice = system = idle = iowait =
		irq = softirq = steal = guest = 0;

	if (ca != &root_cpuacct) {
		struct cpuacct_usage_result res;

		for_each_possible_cpu(cpu) {
381 382 383
			if (!housekeeping_cpu(cpu, HK_FLAG_DOMAIN))
				continue;

384 385 386 387 388 389 390 391 392 393 394 395
			rcu_read_lock();
			__cpuacct_get_usage_result(ca, cpu,
					cgroup_tg(cgrp), &res);
			rcu_read_unlock();

			user += res.user;
			nice += res.nice;
			system += res.system;
			irq += res.irq;
			softirq += res.softirq;
			steal += res.steal;
			guest += res.guest;
396
			guest += res.guest_nice;
397 398 399 400 401 402 403 404 405 406 407 408 409 410
			iowait += res.iowait;
			idle += res.idle;
		}
	} else {
		struct kernel_cpustat *kcpustat;

		for_each_possible_cpu(cpu) {
			kcpustat = per_cpu_ptr(ca->cpustat, cpu);
			user += kcpustat->cpustat[CPUTIME_USER];
			nice += kcpustat->cpustat[CPUTIME_NICE];
			system += kcpustat->cpustat[CPUTIME_SYSTEM];
			irq += kcpustat->cpustat[CPUTIME_IRQ];
			softirq += kcpustat->cpustat[CPUTIME_SOFTIRQ];
			guest += kcpustat->cpustat[CPUTIME_GUEST];
411
			guest += kcpustat->cpustat[CPUTIME_GUEST_NICE];
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
			idle += get_idle_time(cpu);
			iowait += get_iowait_time(cpu);
			steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
		}
	}

	seq_printf(sf, "user %lld\n", nsec_to_clock_t(user));
	seq_printf(sf, "nice %lld\n", nsec_to_clock_t(nice));
	seq_printf(sf, "system %lld\n", nsec_to_clock_t(system));
	seq_printf(sf, "idle %lld\n", nsec_to_clock_t(idle));
	seq_printf(sf, "iowait %lld\n", nsec_to_clock_t(iowait));
	seq_printf(sf, "irq %lld\n", nsec_to_clock_t(irq));
	seq_printf(sf, "softirq %lld\n", nsec_to_clock_t(softirq));
	seq_printf(sf, "steal %lld\n", nsec_to_clock_t(steal));
	seq_printf(sf, "guest %lld\n", nsec_to_clock_t(guest));

	return 0;
}
#endif

432 433 434 435 436 437
static struct cftype files[] = {
	{
		.name = "usage",
		.read_u64 = cpuusage_read,
		.write_u64 = cpuusage_write,
	},
438 439 440 441 442 443 444 445
	{
		.name = "usage_user",
		.read_u64 = cpuusage_user_read,
	},
	{
		.name = "usage_sys",
		.read_u64 = cpuusage_sys_read,
	},
446 447
	{
		.name = "usage_percpu",
448
		.seq_show = cpuacct_percpu_seq_show,
449
	},
450 451 452 453 454 455 456 457
	{
		.name = "usage_percpu_user",
		.seq_show = cpuacct_percpu_user_seq_show,
	},
	{
		.name = "usage_percpu_sys",
		.seq_show = cpuacct_percpu_sys_seq_show,
	},
458 459 460 461
	{
		.name = "usage_all",
		.seq_show = cpuacct_all_seq_show,
	},
462 463
	{
		.name = "stat",
464
		.seq_show = cpuacct_stats_show,
465
	},
466 467 468 469 470 471
#ifdef CONFIG_SCHED_SLI
	{
		.name = "proc_stat",
		.seq_show = cpuacct_proc_stats_show,
	},
#endif
472 473 474 475 476 477 478 479 480 481 482
	{ }	/* terminate */
};

/*
 * charge this task's execution time to its accounting group.
 *
 * called with rq->lock held.
 */
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
	struct cpuacct *ca;
483
	int index = CPUACCT_STAT_SYSTEM;
484
	struct pt_regs *regs = task_pt_regs(tsk);
485

486
	if (regs && user_mode(regs))
487
		index = CPUACCT_STAT_USER;
488 489

	rcu_read_lock();
490

491
	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
492 493
		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;

494 495 496
	rcu_read_unlock();
}

497 498 499 500 501
/*
 * Add user/system time to cpuacct.
 *
 * Note: it's the caller that updates the account of the root cgroup.
 */
502
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
503 504 505 506
{
	struct cpuacct *ca;

	rcu_read_lock();
507 508
	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
509 510 511
	rcu_read_unlock();
}

512
struct cgroup_subsys cpuacct_cgrp_subsys = {
513 514
	.css_alloc	= cpuacct_css_alloc,
	.css_free	= cpuacct_css_free,
515
	.legacy_cftypes	= files,
516
	.early_init	= true,
517
};
518 519

#ifdef CONFIG_PSI
520 521 522 523 524 525 526 527

static bool psi_v1_enable;
static int __init setup_psi_v1(char *str)
{
	return kstrtobool(str, &psi_v1_enable) == 0;
}
__setup("psi_v1=", setup_psi_v1);

528 529
static int __init cgroup_v1_psi_init(void)
{
530 531 532 533 534
	if (!psi_v1_enable) {
		static_branch_enable(&psi_v1_disabled);
		return 0;
	}

535 536 537 538 539 540
	cgroup_add_legacy_cftypes(&cpuacct_cgrp_subsys, cgroup_v1_psi_files);
	return 0;
}

late_initcall_sync(cgroup_v1_psi_init);
#endif