cpuacct.c 8.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7
/*
 * CPU accounting code for task groups.
 *
 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
 * (balbir@in.ibm.com).
 */
8
#include "sched.h"
9

10
/* Time spent by the tasks of the CPU accounting group executing in ... */
L
Li Zefan 已提交
11 12 13 14 15 16 17
enum cpuacct_stat_index {
	CPUACCT_STAT_USER,	/* ... user mode */
	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */

	CPUACCT_STAT_NSTATS,
};

18 19 20
static const char * const cpuacct_stat_desc[] = {
	[CPUACCT_STAT_USER] = "user",
	[CPUACCT_STAT_SYSTEM] = "system",
21 22 23
};

struct cpuacct_usage {
24
	u64	usages[CPUACCT_STAT_NSTATS];
25 26
};

27
/* track CPU usage of a group of tasks and its child groups */
L
Li Zefan 已提交
28
struct cpuacct {
29 30 31 32
	struct cgroup_subsys_state	css;
	/* cpuusage holds pointer to a u64-type object on every CPU */
	struct cpuacct_usage __percpu	*cpuusage;
	struct kernel_cpustat __percpu	*cpustat;
33 34 35 36 37

	ALI_HOTFIX_RESERVE(1)
	ALI_HOTFIX_RESERVE(2)
	ALI_HOTFIX_RESERVE(3)
	ALI_HOTFIX_RESERVE(4)
L
Li Zefan 已提交
38 39
};

40 41 42 43 44
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
{
	return css ? container_of(css, struct cpuacct, css) : NULL;
}

45
/* Return CPU accounting group to which this task belongs */
L
Li Zefan 已提交
46 47
static inline struct cpuacct *task_ca(struct task_struct *tsk)
{
48
	return css_ca(task_css(tsk, cpuacct_cgrp_id));
L
Li Zefan 已提交
49 50 51 52
}

static inline struct cpuacct *parent_ca(struct cpuacct *ca)
{
T
Tejun Heo 已提交
53
	return css_ca(ca->css.parent);
L
Li Zefan 已提交
54 55
}

56
static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
57 58 59 60
static struct cpuacct root_cpuacct = {
	.cpustat	= &kernel_cpustat,
	.cpuusage	= &root_cpuacct_cpuusage,
};
61

62
/* Create a new CPU accounting group */
63 64
static struct cgroup_subsys_state *
cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
65 66 67
{
	struct cpuacct *ca;

68
	if (!parent_css)
69 70 71 72 73 74
		return &root_cpuacct.css;

	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
	if (!ca)
		goto out;

75
	ca->cpuusage = alloc_percpu(struct cpuacct_usage);
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
	if (!ca->cpuusage)
		goto out_free_ca;

	ca->cpustat = alloc_percpu(struct kernel_cpustat);
	if (!ca->cpustat)
		goto out_free_cpuusage;

	return &ca->css;

out_free_cpuusage:
	free_percpu(ca->cpuusage);
out_free_ca:
	kfree(ca);
out:
	return ERR_PTR(-ENOMEM);
}

93
/* Destroy an existing CPU accounting group */
94
static void cpuacct_css_free(struct cgroup_subsys_state *css)
95
{
96
	struct cpuacct *ca = css_ca(css);
97 98 99 100 101 102

	free_percpu(ca->cpustat);
	free_percpu(ca->cpuusage);
	kfree(ca);
}

103
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
104
				 enum cpuacct_stat_index index)
105
{
106
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
107 108
	u64 data;

109
	/*
110
	 * We allow index == CPUACCT_STAT_NSTATS here to read
111 112
	 * the sum of suages.
	 */
113
	BUG_ON(index > CPUACCT_STAT_NSTATS);
114

115 116 117 118 119
#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
120 121
#endif

122
	if (index == CPUACCT_STAT_NSTATS) {
123 124 125
		int i = 0;

		data = 0;
126
		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
127 128 129 130 131 132
			data += cpuusage->usages[i];
	} else {
		data = cpuusage->usages[index];
	}

#ifndef CONFIG_64BIT
133 134 135 136 137 138 139 140
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif

	return data;
}

static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
{
141 142
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
	int i;
143 144 145 146 147 148

#ifndef CONFIG_64BIT
	/*
	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
	 */
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
149 150
#endif

151
	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
152 153 154
		cpuusage->usages[i] = val;

#ifndef CONFIG_64BIT
155 156 157 158
	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
}

159
/* Return total CPU usage (in nanoseconds) of a group */
160
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
161
			   enum cpuacct_stat_index index)
162
{
163
	struct cpuacct *ca = css_ca(css);
164 165 166
	u64 totalcpuusage = 0;
	int i;

167
	for_each_possible_cpu(i)
168
		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
169 170 171 172

	return totalcpuusage;
}

173 174 175
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
			      struct cftype *cft)
{
176
	return __cpuusage_read(css, CPUACCT_STAT_USER);
177 178 179 180 181
}

static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
			     struct cftype *cft)
{
182
	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
183 184 185 186
}

static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
187
	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
188 189
}

190
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
191
			  u64 val)
192
{
193
	struct cpuacct *ca = css_ca(css);
194
	int cpu;
195

196 197 198
	/*
	 * Only allow '0' here to do a reset.
	 */
199 200
	if (val)
		return -EINVAL;
201

202 203
	for_each_possible_cpu(cpu)
		cpuacct_cpuusage_write(ca, cpu, 0);
204

205
	return 0;
206 207
}

208
static int __cpuacct_percpu_seq_show(struct seq_file *m,
209
				     enum cpuacct_stat_index index)
210
{
211
	struct cpuacct *ca = css_ca(seq_css(m));
212 213 214
	u64 percpu;
	int i;

215
	for_each_possible_cpu(i) {
216
		percpu = cpuacct_cpuusage_read(ca, i, index);
217 218 219 220 221 222
		seq_printf(m, "%llu ", (unsigned long long) percpu);
	}
	seq_printf(m, "\n");
	return 0;
}

223 224
static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
{
225
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
226 227 228 229
}

static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
{
230
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
231 232 233 234
}

static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
{
235
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
236 237
}

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
static int cpuacct_all_seq_show(struct seq_file *m, void *V)
{
	struct cpuacct *ca = css_ca(seq_css(m));
	int index;
	int cpu;

	seq_puts(m, "cpu");
	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
		seq_printf(m, " %s", cpuacct_stat_desc[index]);
	seq_puts(m, "\n");

	for_each_possible_cpu(cpu) {
		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);

		seq_printf(m, "%d", cpu);

		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
#ifndef CONFIG_64BIT
			/*
			 * Take rq->lock to make 64-bit read safe on 32-bit
			 * platforms.
			 */
			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
#endif

			seq_printf(m, " %llu", cpuusage->usages[index]);

#ifndef CONFIG_64BIT
			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
		}
		seq_puts(m, "\n");
	}
	return 0;
}

274
static int cpuacct_stats_show(struct seq_file *sf, void *v)
275
{
276
	struct cpuacct *ca = css_ca(seq_css(sf));
277
	s64 val[CPUACCT_STAT_NSTATS];
278
	int cpu;
279
	int stat;
280

281
	memset(val, 0, sizeof(val));
282
	for_each_possible_cpu(cpu) {
283
		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
284

285 286 287 288 289
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
290 291
	}

292 293 294
	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
		seq_printf(sf, "%s %lld\n",
			   cpuacct_stat_desc[stat],
295
			   (long long)nsec_to_clock_t(val[stat]));
296
	}
297 298 299 300 301 302 303 304 305 306

	return 0;
}

static struct cftype files[] = {
	{
		.name = "usage",
		.read_u64 = cpuusage_read,
		.write_u64 = cpuusage_write,
	},
307 308 309 310 311 312 313 314
	{
		.name = "usage_user",
		.read_u64 = cpuusage_user_read,
	},
	{
		.name = "usage_sys",
		.read_u64 = cpuusage_sys_read,
	},
315 316
	{
		.name = "usage_percpu",
317
		.seq_show = cpuacct_percpu_seq_show,
318
	},
319 320 321 322 323 324 325 326
	{
		.name = "usage_percpu_user",
		.seq_show = cpuacct_percpu_user_seq_show,
	},
	{
		.name = "usage_percpu_sys",
		.seq_show = cpuacct_percpu_sys_seq_show,
	},
327 328 329 330
	{
		.name = "usage_all",
		.seq_show = cpuacct_all_seq_show,
	},
331 332
	{
		.name = "stat",
333
		.seq_show = cpuacct_stats_show,
334 335 336 337 338 339 340 341 342 343 344 345
	},
	{ }	/* terminate */
};

/*
 * charge this task's execution time to its accounting group.
 *
 * called with rq->lock held.
 */
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
	struct cpuacct *ca;
346
	int index = CPUACCT_STAT_SYSTEM;
347
	struct pt_regs *regs = task_pt_regs(tsk);
348

349
	if (regs && user_mode(regs))
350
		index = CPUACCT_STAT_USER;
351 352

	rcu_read_lock();
353

354
	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
355 356
		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;

357 358 359
	rcu_read_unlock();
}

360 361 362 363 364
/*
 * Add user/system time to cpuacct.
 *
 * Note: it's the caller that updates the account of the root cgroup.
 */
365
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
366 367 368 369
{
	struct cpuacct *ca;

	rcu_read_lock();
370 371
	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
372 373 374
	rcu_read_unlock();
}

375
struct cgroup_subsys cpuacct_cgrp_subsys = {
376 377
	.css_alloc	= cpuacct_css_alloc,
	.css_free	= cpuacct_css_free,
378
	.legacy_cftypes	= files,
379
	.early_init	= true,
380
};
381 382

#ifdef CONFIG_PSI
383 384 385 386 387 388 389 390

static bool psi_v1_enable;
static int __init setup_psi_v1(char *str)
{
	return kstrtobool(str, &psi_v1_enable) == 0;
}
__setup("psi_v1=", setup_psi_v1);

391 392
static int __init cgroup_v1_psi_init(void)
{
393 394 395 396 397
	if (!psi_v1_enable) {
		static_branch_enable(&psi_v1_disabled);
		return 0;
	}

398 399 400 401 402 403
	cgroup_add_legacy_cftypes(&cpuacct_cgrp_subsys, cgroup_v1_psi_files);
	return 0;
}

late_initcall_sync(cgroup_v1_psi_init);
#endif