cpuacct.c 12.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7
/*
 * CPU accounting code for task groups.
 *
 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
 * (balbir@in.ibm.com).
 */
8
#include "sched.h"
9

10
/* Time spent by the tasks of the CPU accounting group executing in ... */
L
Li Zefan 已提交
11 12 13 14 15 16 17
enum cpuacct_stat_index {
	CPUACCT_STAT_USER,	/* ... user mode */
	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */

	CPUACCT_STAT_NSTATS,
};

18 19 20
static const char * const cpuacct_stat_desc[] = {
	[CPUACCT_STAT_USER] = "user",
	[CPUACCT_STAT_SYSTEM] = "system",
21 22 23
};

struct cpuacct_usage {
24
	u64	usages[CPUACCT_STAT_NSTATS];
25 26 27
	struct prev_cputime prev_cputime1; /* utime and stime */
	struct prev_cputime prev_cputime2; /* user and nice */
} ____cacheline_aligned;
28

29
/* track CPU usage of a group of tasks and its child groups */
L
Li Zefan 已提交
30
struct cpuacct {
31 32 33 34
	struct cgroup_subsys_state	css;
	/* cpuusage holds pointer to a u64-type object on every CPU */
	struct cpuacct_usage __percpu	*cpuusage;
	struct kernel_cpustat __percpu	*cpustat;
35 36 37 38 39

	ALI_HOTFIX_RESERVE(1)
	ALI_HOTFIX_RESERVE(2)
	ALI_HOTFIX_RESERVE(3)
	ALI_HOTFIX_RESERVE(4)
L
Li Zefan 已提交
40 41
};

42 43 44 45 46
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
{
	return css ? container_of(css, struct cpuacct, css) : NULL;
}

47
/* Return CPU accounting group to which this task belongs */
L
Li Zefan 已提交
48 49
static inline struct cpuacct *task_ca(struct task_struct *tsk)
{
50
	return css_ca(task_css(tsk, cpuacct_cgrp_id));
L
Li Zefan 已提交
51 52 53 54
}

static inline struct cpuacct *parent_ca(struct cpuacct *ca)
{
T
Tejun Heo 已提交
55
	return css_ca(ca->css.parent);
L
Li Zefan 已提交
56 57
}

58
static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
59 60 61 62
static struct cpuacct root_cpuacct = {
	.cpustat	= &kernel_cpustat,
	.cpuusage	= &root_cpuacct_cpuusage,
};
63

64
/* Create a new CPU accounting group */
65 66
static struct cgroup_subsys_state *
cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
67 68
{
	struct cpuacct *ca;
69
	int i;
70

71
	if (!parent_css)
72 73 74 75 76 77
		return &root_cpuacct.css;

	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
	if (!ca)
		goto out;

78
	ca->cpuusage = alloc_percpu(struct cpuacct_usage);
79 80 81 82 83 84 85
	if (!ca->cpuusage)
		goto out_free_ca;

	ca->cpustat = alloc_percpu(struct kernel_cpustat);
	if (!ca->cpustat)
		goto out_free_cpuusage;

86 87 88 89 90
	for_each_possible_cpu(i) {
		prev_cputime_init(&per_cpu_ptr(ca->cpuusage, i)->prev_cputime1);
		prev_cputime_init(&per_cpu_ptr(ca->cpuusage, i)->prev_cputime2);
	}

91 92 93 94 95 96 97 98 99 100
	return &ca->css;

out_free_cpuusage:
	free_percpu(ca->cpuusage);
out_free_ca:
	kfree(ca);
out:
	return ERR_PTR(-ENOMEM);
}

101
/* Destroy an existing CPU accounting group */
102
static void cpuacct_css_free(struct cgroup_subsys_state *css)
103
{
104
	struct cpuacct *ca = css_ca(css);
105 106 107 108 109 110

	free_percpu(ca->cpustat);
	free_percpu(ca->cpuusage);
	kfree(ca);
}

111
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
112
				 enum cpuacct_stat_index index)
113
{
114
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
115 116
	u64 data;

117
	/*
118
	 * We allow index == CPUACCT_STAT_NSTATS here to read
119 120
	 * the sum of suages.
	 */
121
	BUG_ON(index > CPUACCT_STAT_NSTATS);
122

123 124 125 126 127
#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
128 129
#endif

130
	if (index == CPUACCT_STAT_NSTATS) {
131 132 133
		int i = 0;

		data = 0;
134
		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
135 136 137 138 139 140
			data += cpuusage->usages[i];
	} else {
		data = cpuusage->usages[index];
	}

#ifndef CONFIG_64BIT
141 142 143 144 145 146 147 148
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif

	return data;
}

static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
{
149 150
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
	int i;
151 152 153 154 155 156

#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
157 158
#endif

159
	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
160 161 162
		cpuusage->usages[i] = val;

#ifndef CONFIG_64BIT
163 164 165 166
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
}

167
/* Return total CPU usage (in nanoseconds) of a group */
168
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
169
			   enum cpuacct_stat_index index)
170
{
171
	struct cpuacct *ca = css_ca(css);
172 173 174
	u64 totalcpuusage = 0;
	int i;

175
	for_each_possible_cpu(i)
176
		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
177 178 179 180

	return totalcpuusage;
}

181 182 183
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
			      struct cftype *cft)
{
184
	return __cpuusage_read(css, CPUACCT_STAT_USER);
185 186 187 188 189
}

static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
			     struct cftype *cft)
{
190
	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
191 192 193 194
}

static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
195
	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
196 197
}

198
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
199
			  u64 val)
200
{
201
	struct cpuacct *ca = css_ca(css);
202
	int cpu;
203

204 205 206
	/*
	 * Only allow '0' here to do a reset.
	 */
207 208
	if (val)
		return -EINVAL;
209

210 211
	for_each_possible_cpu(cpu)
		cpuacct_cpuusage_write(ca, cpu, 0);
212

213
	return 0;
214 215
}

216
static int __cpuacct_percpu_seq_show(struct seq_file *m,
217
				     enum cpuacct_stat_index index)
218
{
219
	struct cpuacct *ca = css_ca(seq_css(m));
220 221 222
	u64 percpu;
	int i;

223
	for_each_possible_cpu(i) {
224
		percpu = cpuacct_cpuusage_read(ca, i, index);
225 226 227 228 229 230
		seq_printf(m, "%llu ", (unsigned long long) percpu);
	}
	seq_printf(m, "\n");
	return 0;
}

231 232
static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
{
233
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
234 235 236 237
}

static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
{
238
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
239 240 241 242
}

static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
{
243
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
244 245
}

246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static int cpuacct_all_seq_show(struct seq_file *m, void *V)
{
	struct cpuacct *ca = css_ca(seq_css(m));
	int index;
	int cpu;

	seq_puts(m, "cpu");
	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
		seq_printf(m, " %s", cpuacct_stat_desc[index]);
	seq_puts(m, "\n");

	for_each_possible_cpu(cpu) {
		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);

		seq_printf(m, "%d", cpu);

		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
#ifndef CONFIG_64BIT
			/*
			 * Take rq->lock to make 64-bit read safe on 32-bit
			 * platforms.
			 */
			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
#endif

			seq_printf(m, " %llu", cpuusage->usages[index]);

#ifndef CONFIG_64BIT
			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
		}
		seq_puts(m, "\n");
	}
	return 0;
}

282
static int cpuacct_stats_show(struct seq_file *sf, void *v)
283
{
284
	struct cpuacct *ca = css_ca(seq_css(sf));
285
	s64 val[CPUACCT_STAT_NSTATS];
286
	int cpu;
287
	int stat;
288

289
	memset(val, 0, sizeof(val));
290
	for_each_possible_cpu(cpu) {
291
		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
292

293 294 295 296 297
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
298 299
	}

300 301 302
	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
		seq_printf(sf, "%s %lld\n",
			   cpuacct_stat_desc[stat],
303
			   (long long)nsec_to_clock_t(val[stat]));
304
	}
305 306 307 308

	return 0;
}

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
#ifdef CONFIG_SCHED_SLI
#ifndef arch_idle_time
#define arch_idle_time(cpu) 0
#endif

static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
{
	return container_of(global_cgroup_css(cgrp, cpu_cgrp_id),
				struct task_group, css);
}

static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
		struct task_group *tg, struct cpuacct_usage_result *res)
{
	struct kernel_cpustat *kcpustat;
	struct cpuacct_usage *cpuusage;
	struct task_cputime cputime;
	u64 tick_user, tick_nice, tick_sys, left, right;
	struct sched_entity *se;

	kcpustat = per_cpu_ptr(ca->cpustat, cpu);
	if (unlikely(!tg)) {
		memset(res, 0, sizeof(*res));
		return;
	}

	se = tg->se[cpu];
	cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
	tick_user = kcpustat->cpustat[CPUTIME_USER];
	tick_nice = kcpustat->cpustat[CPUTIME_NICE];
	tick_sys = kcpustat->cpustat[CPUTIME_SYSTEM];

	/* Calculate system run time */
	cputime.sum_exec_runtime = cpuusage->usages[CPUACCT_STAT_USER] +
			cpuusage->usages[CPUACCT_STAT_SYSTEM];
	cputime.utime = tick_user + tick_nice;
	cputime.stime = tick_sys;
	cputime_adjust(&cputime, &cpuusage->prev_cputime1, &left, &right);
	res->system = right;

	/* Calculate user and nice run time */
	cputime.sum_exec_runtime = left; /* user + nice */
	cputime.utime = tick_user;
	cputime.stime = tick_nice;
	cputime_adjust(&cputime, &cpuusage->prev_cputime2, &left, &right);
	res->user = left;
	res->nice = right;

	res->irq = kcpustat->cpustat[CPUTIME_IRQ];
	res->softirq = kcpustat->cpustat[CPUTIME_SOFTIRQ];
	if (se)
		res->steal = se->statistics.wait_sum;
	else
		res->steal = 0;
	res->guest = res->guest_nice = 0; /* currently always 0 */
}

static int cpuacct_proc_stats_show(struct seq_file *sf, void *v)
{
	struct cpuacct *ca = css_ca(seq_css(sf));
	struct cgroup *cgrp = seq_css(sf)->cgroup;
	u64 user, nice, system, idle, iowait, irq, softirq, steal, guest;
	int cpu;

	user = nice = system = idle = iowait =
		irq = softirq = steal = guest = 0;

	if (ca != &root_cpuacct) {
		struct cpuacct_usage_result res;

		for_each_possible_cpu(cpu) {
380 381 382
			if (!housekeeping_cpu(cpu, HK_FLAG_DOMAIN))
				continue;

383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
			rcu_read_lock();
			__cpuacct_get_usage_result(ca, cpu,
					cgroup_tg(cgrp), &res);
			rcu_read_unlock();

			user += res.user;
			nice += res.nice;
			system += res.system;
			irq += res.irq;
			softirq += res.softirq;
			steal += res.steal;
			guest += res.guest;
			iowait += res.iowait;
			idle += res.idle;
		}
	} else {
		struct kernel_cpustat *kcpustat;

		for_each_possible_cpu(cpu) {
			kcpustat = per_cpu_ptr(ca->cpustat, cpu);
			user += kcpustat->cpustat[CPUTIME_USER];
			nice += kcpustat->cpustat[CPUTIME_NICE];
			system += kcpustat->cpustat[CPUTIME_SYSTEM];
			irq += kcpustat->cpustat[CPUTIME_IRQ];
			softirq += kcpustat->cpustat[CPUTIME_SOFTIRQ];
			guest += kcpustat->cpustat[CPUTIME_GUEST];
			idle += get_idle_time(cpu);
			iowait += get_iowait_time(cpu);
			steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
		}
	}

	seq_printf(sf, "user %lld\n", nsec_to_clock_t(user));
	seq_printf(sf, "nice %lld\n", nsec_to_clock_t(nice));
	seq_printf(sf, "system %lld\n", nsec_to_clock_t(system));
	seq_printf(sf, "idle %lld\n", nsec_to_clock_t(idle));
	seq_printf(sf, "iowait %lld\n", nsec_to_clock_t(iowait));
	seq_printf(sf, "irq %lld\n", nsec_to_clock_t(irq));
	seq_printf(sf, "softirq %lld\n", nsec_to_clock_t(softirq));
	seq_printf(sf, "steal %lld\n", nsec_to_clock_t(steal));
	seq_printf(sf, "guest %lld\n", nsec_to_clock_t(guest));

	return 0;
}
#endif

429 430 431 432 433 434
static struct cftype files[] = {
	{
		.name = "usage",
		.read_u64 = cpuusage_read,
		.write_u64 = cpuusage_write,
	},
435 436 437 438 439 440 441 442
	{
		.name = "usage_user",
		.read_u64 = cpuusage_user_read,
	},
	{
		.name = "usage_sys",
		.read_u64 = cpuusage_sys_read,
	},
443 444
	{
		.name = "usage_percpu",
445
		.seq_show = cpuacct_percpu_seq_show,
446
	},
447 448 449 450 451 452 453 454
	{
		.name = "usage_percpu_user",
		.seq_show = cpuacct_percpu_user_seq_show,
	},
	{
		.name = "usage_percpu_sys",
		.seq_show = cpuacct_percpu_sys_seq_show,
	},
455 456 457 458
	{
		.name = "usage_all",
		.seq_show = cpuacct_all_seq_show,
	},
459 460
	{
		.name = "stat",
461
		.seq_show = cpuacct_stats_show,
462
	},
463 464 465 466 467 468
#ifdef CONFIG_SCHED_SLI
	{
		.name = "proc_stat",
		.seq_show = cpuacct_proc_stats_show,
	},
#endif
469 470 471 472 473 474 475 476 477 478 479
	{ }	/* terminate */
};

/*
 * charge this task's execution time to its accounting group.
 *
 * called with rq->lock held.
 */
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
	struct cpuacct *ca;
480
	int index = CPUACCT_STAT_SYSTEM;
481
	struct pt_regs *regs = task_pt_regs(tsk);
482

483
	if (regs && user_mode(regs))
484
		index = CPUACCT_STAT_USER;
485 486

	rcu_read_lock();
487

488
	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
489 490
		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;

491 492 493
	rcu_read_unlock();
}

494 495 496 497 498
/*
 * Add user/system time to cpuacct.
 *
 * Note: it's the caller that updates the account of the root cgroup.
 */
499
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
500 501 502 503
{
	struct cpuacct *ca;

	rcu_read_lock();
504 505
	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
506 507 508
	rcu_read_unlock();
}

509
struct cgroup_subsys cpuacct_cgrp_subsys = {
510 511
	.css_alloc	= cpuacct_css_alloc,
	.css_free	= cpuacct_css_free,
512
	.legacy_cftypes	= files,
513
	.early_init	= true,
514
};
515 516

#ifdef CONFIG_PSI
517 518 519 520 521 522 523 524

static bool psi_v1_enable;
static int __init setup_psi_v1(char *str)
{
	return kstrtobool(str, &psi_v1_enable) == 0;
}
__setup("psi_v1=", setup_psi_v1);

525 526
static int __init cgroup_v1_psi_init(void)
{
527 528 529 530 531
	if (!psi_v1_enable) {
		static_branch_enable(&psi_v1_disabled);
		return 0;
	}

532 533 534 535 536 537
	cgroup_add_legacy_cftypes(&cpuacct_cgrp_subsys, cgroup_v1_psi_files);
	return 0;
}

late_initcall_sync(cgroup_v1_psi_init);
#endif