blk-cgroup.c 46.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Common Block IO controller cgroup interface
 *
 * Based on ideas and code from CFQ, CFS and BFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *		      Paolo Valente <paolo.valente@unimore.it>
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 * 	              Nauman Rafique <nauman@google.com>
 */
#include <linux/ioprio.h>
14 15
#include <linux/seq_file.h>
#include <linux/kdev_t.h>
16
#include <linux/module.h>
17
#include <linux/err.h>
18
#include <linux/blkdev.h>
19
#include <linux/slab.h>
20
#include <linux/genhd.h>
21 22
#include <linux/delay.h>
#include "blk-cgroup.h"
23
#include "blk.h"
24

25 26
#define MAX_KEY_LEN 100

27 28
static DEFINE_SPINLOCK(blkio_list_lock);
static LIST_HEAD(blkio_list);
29

30 31 32
static DEFINE_MUTEX(all_q_mutex);
static LIST_HEAD(all_q_list);

33
struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
34 35
EXPORT_SYMBOL_GPL(blkio_root_cgroup);

36 37
static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];

B
Ben Blum 已提交
38 39
static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
						  struct cgroup *);
40 41 42 43
static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
			      struct cgroup_taskset *);
static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
			   struct cgroup_taskset *);
44
static int blkiocg_pre_destroy(struct cgroup_subsys *, struct cgroup *);
B
Ben Blum 已提交
45 46 47
static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);

48 49 50 51 52 53
/* for encoding cft->private value on file */
#define BLKIOFILE_PRIVATE(x, val)	(((x) << 16) | (val))
/* What policy owns the file, proportional or throttle */
#define BLKIOFILE_POLICY(val)		(((val) >> 16) & 0xffff)
#define BLKIOFILE_ATTR(val)		((val) & 0xffff)

B
Ben Blum 已提交
54 55 56
struct cgroup_subsys blkio_subsys = {
	.name = "blkio",
	.create = blkiocg_create,
57 58
	.can_attach = blkiocg_can_attach,
	.attach = blkiocg_attach,
59
	.pre_destroy = blkiocg_pre_destroy,
B
Ben Blum 已提交
60 61 62 63 64 65 66
	.destroy = blkiocg_destroy,
	.populate = blkiocg_populate,
	.subsys_id = blkio_subsys_id,
	.module = THIS_MODULE,
};
EXPORT_SYMBOL_GPL(blkio_subsys);

67 68 69 70 71
struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
{
	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
			    struct blkio_cgroup, css);
}
72
EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
73

74
static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
75 76 77 78
{
	return container_of(task_subsys_state(tsk, blkio_subsys_id),
			    struct blkio_cgroup, css);
}
79 80 81 82 83 84 85 86

struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
{
	if (bio && bio->bi_css)
		return container_of(bio->bi_css, struct blkio_cgroup, css);
	return task_blkio_cgroup(current);
}
EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
87

88 89
static inline void blkio_update_group_weight(struct blkio_group *blkg,
					     int plid, unsigned int weight)
90 91 92 93 94
{
	struct blkio_policy_type *blkiop;

	list_for_each_entry(blkiop, &blkio_list, list) {
		/* If this policy does not own the blkg, do not send updates */
95
		if (blkiop->plid != plid)
96 97
			continue;
		if (blkiop->ops.blkio_update_group_weight_fn)
98
			blkiop->ops.blkio_update_group_weight_fn(blkg->q,
99
							blkg, weight);
100 101 102
	}
}

103 104
static inline void blkio_update_group_bps(struct blkio_group *blkg, int plid,
					  u64 bps, int fileid)
105 106 107 108 109 110
{
	struct blkio_policy_type *blkiop;

	list_for_each_entry(blkiop, &blkio_list, list) {

		/* If this policy does not own the blkg, do not send updates */
111
		if (blkiop->plid != plid)
112 113 114 115
			continue;

		if (fileid == BLKIO_THROTL_read_bps_device
		    && blkiop->ops.blkio_update_group_read_bps_fn)
116
			blkiop->ops.blkio_update_group_read_bps_fn(blkg->q,
117
								blkg, bps);
118 119 120

		if (fileid == BLKIO_THROTL_write_bps_device
		    && blkiop->ops.blkio_update_group_write_bps_fn)
121
			blkiop->ops.blkio_update_group_write_bps_fn(blkg->q,
122
								blkg, bps);
123 124 125
	}
}

126
static inline void blkio_update_group_iops(struct blkio_group *blkg,
127 128
					   int plid, unsigned int iops,
					   int fileid)
129 130 131 132 133 134
{
	struct blkio_policy_type *blkiop;

	list_for_each_entry(blkiop, &blkio_list, list) {

		/* If this policy does not own the blkg, do not send updates */
135
		if (blkiop->plid != plid)
136 137 138 139
			continue;

		if (fileid == BLKIO_THROTL_read_iops_device
		    && blkiop->ops.blkio_update_group_read_iops_fn)
140
			blkiop->ops.blkio_update_group_read_iops_fn(blkg->q,
141
								blkg, iops);
142 143 144

		if (fileid == BLKIO_THROTL_write_iops_device
		    && blkiop->ops.blkio_update_group_write_iops_fn)
145
			blkiop->ops.blkio_update_group_write_iops_fn(blkg->q,
146
								blkg,iops);
147 148 149
	}
}

150 151 152 153
/*
 * Add to the appropriate stat variable depending on the request type.
 * This should be called with the blkg->stats_lock held.
 */
154 155
static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
				bool sync)
156
{
157 158
	if (direction)
		stat[BLKIO_STAT_WRITE] += add;
159
	else
160 161 162
		stat[BLKIO_STAT_READ] += add;
	if (sync)
		stat[BLKIO_STAT_SYNC] += add;
163
	else
164
		stat[BLKIO_STAT_ASYNC] += add;
165 166
}

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
/*
 * Decrements the appropriate stat variable if non-zero depending on the
 * request type. Panics on value being zero.
 * This should be called with the blkg->stats_lock held.
 */
static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
{
	if (direction) {
		BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
		stat[BLKIO_STAT_WRITE]--;
	} else {
		BUG_ON(stat[BLKIO_STAT_READ] == 0);
		stat[BLKIO_STAT_READ]--;
	}
	if (sync) {
		BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
		stat[BLKIO_STAT_SYNC]--;
	} else {
		BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
		stat[BLKIO_STAT_ASYNC]--;
	}
}

#ifdef CONFIG_DEBUG_BLK_CGROUP
191 192
/* This should be called with the blkg->stats_lock held. */
static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
193 194
					    struct blkio_policy_type *pol,
					    struct blkio_group *curr_blkg)
195
{
196
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
197 198

	if (blkio_blkg_waiting(&pd->stats))
199 200 201
		return;
	if (blkg == curr_blkg)
		return;
202 203
	pd->stats.start_group_wait_time = sched_clock();
	blkio_mark_blkg_waiting(&pd->stats);
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
}

/* This should be called with the blkg->stats_lock held. */
static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
{
	unsigned long long now;

	if (!blkio_blkg_waiting(stats))
		return;

	now = sched_clock();
	if (time_after64(now, stats->start_group_wait_time))
		stats->group_wait_time += now - stats->start_group_wait_time;
	blkio_clear_blkg_waiting(stats);
}

/* This should be called with the blkg->stats_lock held. */
static void blkio_end_empty_time(struct blkio_group_stats *stats)
{
	unsigned long long now;

	if (!blkio_blkg_empty(stats))
		return;

	now = sched_clock();
	if (time_after64(now, stats->start_empty_time))
		stats->empty_time += now - stats->start_empty_time;
	blkio_clear_blkg_empty(stats);
}

234 235
void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
					struct blkio_policy_type *pol)
236
{
237
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
238 239 240
	unsigned long flags;

	spin_lock_irqsave(&blkg->stats_lock, flags);
241 242 243
	BUG_ON(blkio_blkg_idling(&pd->stats));
	pd->stats.start_idle_time = sched_clock();
	blkio_mark_blkg_idling(&pd->stats);
244 245 246 247
	spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);

248 249
void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
				    struct blkio_policy_type *pol)
250
{
251
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
252 253 254 255 256
	unsigned long flags;
	unsigned long long now;
	struct blkio_group_stats *stats;

	spin_lock_irqsave(&blkg->stats_lock, flags);
257
	stats = &pd->stats;
258 259 260 261 262 263 264 265 266 267
	if (blkio_blkg_idling(stats)) {
		now = sched_clock();
		if (time_after64(now, stats->start_idle_time))
			stats->idle_time += now - stats->start_idle_time;
		blkio_clear_blkg_idling(stats);
	}
	spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);

268 269
void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
					 struct blkio_policy_type *pol)
270
{
271
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
272 273 274 275
	unsigned long flags;
	struct blkio_group_stats *stats;

	spin_lock_irqsave(&blkg->stats_lock, flags);
276
	stats = &pd->stats;
277 278 279 280
	stats->avg_queue_size_sum +=
			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
	stats->avg_queue_size_samples++;
281
	blkio_update_group_wait_time(stats);
282 283
	spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
284 285
EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);

286 287
void blkiocg_set_start_empty_time(struct blkio_group *blkg,
				  struct blkio_policy_type *pol)
D
Divyesh Shah 已提交
288
{
289
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
D
Divyesh Shah 已提交
290 291 292 293
	unsigned long flags;
	struct blkio_group_stats *stats;

	spin_lock_irqsave(&blkg->stats_lock, flags);
294
	stats = &pd->stats;
D
Divyesh Shah 已提交
295 296 297 298 299 300 301 302

	if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
		spin_unlock_irqrestore(&blkg->stats_lock, flags);
		return;
	}

	/*
303 304 305
	 * group is already marked empty. This can happen if cfqq got new
	 * request in parent group and moved to this group while being added
	 * to service tree. Just ignore the event and move on.
D
Divyesh Shah 已提交
306
	 */
307 308 309 310 311
	if(blkio_blkg_empty(stats)) {
		spin_unlock_irqrestore(&blkg->stats_lock, flags);
		return;
	}

D
Divyesh Shah 已提交
312 313 314 315 316 317
	stats->start_empty_time = sched_clock();
	blkio_mark_blkg_empty(stats);
	spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);

318
void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
319 320
				  struct blkio_policy_type *pol,
				  unsigned long dequeue)
321
{
322
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
323 324

	pd->stats.dequeue += dequeue;
325 326
}
EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
327 328
#else
static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
329 330 331
					struct blkio_policy_type *pol,
					struct blkio_group *curr_blkg) { }
static inline void blkio_end_empty_time(struct blkio_group_stats *stats) { }
332 333
#endif

334
void blkiocg_update_io_add_stats(struct blkio_group *blkg,
335 336 337
				 struct blkio_policy_type *pol,
				 struct blkio_group *curr_blkg, bool direction,
				 bool sync)
338
{
339
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
340 341 342
	unsigned long flags;

	spin_lock_irqsave(&blkg->stats_lock, flags);
343
	blkio_add_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
344
			sync);
345
	blkio_end_empty_time(&pd->stats);
346
	blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
347 348
	spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
349
EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
350

351
void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
352 353
				    struct blkio_policy_type *pol,
				    bool direction, bool sync)
354
{
355
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
356 357 358
	unsigned long flags;

	spin_lock_irqsave(&blkg->stats_lock, flags);
359
	blkio_check_and_dec_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED],
360 361 362
					direction, sync);
	spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
363
EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
364

365 366 367 368
void blkiocg_update_timeslice_used(struct blkio_group *blkg,
				   struct blkio_policy_type *pol,
				   unsigned long time,
				   unsigned long unaccounted_time)
369
{
370
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
371 372 373
	unsigned long flags;

	spin_lock_irqsave(&blkg->stats_lock, flags);
374
	pd->stats.time += time;
375
#ifdef CONFIG_DEBUG_BLK_CGROUP
376
	pd->stats.unaccounted_time += unaccounted_time;
377
#endif
378
	spin_unlock_irqrestore(&blkg->stats_lock, flags);
379
}
380
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
381

382 383 384 385
/*
 * should be called under rcu read lock or queue lock to make sure blkg pointer
 * is valid.
 */
386
void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
387 388
				   struct blkio_policy_type *pol,
				   uint64_t bytes, bool direction, bool sync)
389
{
390
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
391
	struct blkio_group_stats_cpu *stats_cpu;
392 393 394 395 396 397 398 399
	unsigned long flags;

	/*
	 * Disabling interrupts to provide mutual exclusion between two
	 * writes on same cpu. It probably is not needed for 64bit. Not
	 * optimizing that case yet.
	 */
	local_irq_save(flags);
400

401
	stats_cpu = this_cpu_ptr(pd->stats_cpu);
402

403
	u64_stats_update_begin(&stats_cpu->syncp);
404 405 406 407 408
	stats_cpu->sectors += bytes >> 9;
	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
			1, direction, sync);
	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
			bytes, direction, sync);
409 410
	u64_stats_update_end(&stats_cpu->syncp);
	local_irq_restore(flags);
411
}
412
EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
413

414
void blkiocg_update_completion_stats(struct blkio_group *blkg,
415 416 417 418
				     struct blkio_policy_type *pol,
				     uint64_t start_time,
				     uint64_t io_start_time, bool direction,
				     bool sync)
419
{
420
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
421 422 423 424 425
	struct blkio_group_stats *stats;
	unsigned long flags;
	unsigned long long now = sched_clock();

	spin_lock_irqsave(&blkg->stats_lock, flags);
426
	stats = &pd->stats;
427 428 429 430 431 432
	if (time_after64(now, io_start_time))
		blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
				now - io_start_time, direction, sync);
	if (time_after64(io_start_time, start_time))
		blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
				io_start_time - start_time, direction, sync);
433 434
	spin_unlock_irqrestore(&blkg->stats_lock, flags);
}
435
EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
436

437
/*  Merged stats are per cpu.  */
438 439 440
void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
				    struct blkio_policy_type *pol,
				    bool direction, bool sync)
D
Divyesh Shah 已提交
441
{
442
	struct blkg_policy_data *pd = blkg->pd[pol->plid];
443
	struct blkio_group_stats_cpu *stats_cpu;
D
Divyesh Shah 已提交
444 445
	unsigned long flags;

446 447 448 449 450 451 452
	/*
	 * Disabling interrupts to provide mutual exclusion between two
	 * writes on same cpu. It probably is not needed for 64bit. Not
	 * optimizing that case yet.
	 */
	local_irq_save(flags);

453
	stats_cpu = this_cpu_ptr(pd->stats_cpu);
454 455 456 457 458 459

	u64_stats_update_begin(&stats_cpu->syncp);
	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
				direction, sync);
	u64_stats_update_end(&stats_cpu->syncp);
	local_irq_restore(flags);
D
Divyesh Shah 已提交
460 461 462
}
EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);

463 464 465 466 467 468 469 470
/**
 * blkg_free - free a blkg
 * @blkg: blkg to free
 *
 * Free @blkg which may be partially allocated.
 */
static void blkg_free(struct blkio_group *blkg)
{
471
	int i;
472 473 474 475

	if (!blkg)
		return;

476 477 478 479 480 481 482
	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
		struct blkg_policy_data *pd = blkg->pd[i];

		if (pd) {
			free_percpu(pd->stats_cpu);
			kfree(pd);
		}
483
	}
484

485
	kfree(blkg);
486 487 488 489 490 491 492
}

/**
 * blkg_alloc - allocate a blkg
 * @blkcg: block cgroup the new blkg is associated with
 * @q: request_queue the new blkg is associated with
 *
493
 * Allocate a new blkg assocating @blkcg and @q.
494 495 496 497 498
 *
 * FIXME: Should be called with queue locked but currently isn't due to
 *        percpu stat breakage.
 */
static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
499
				      struct request_queue *q)
500 501
{
	struct blkio_group *blkg;
502
	int i;
503 504 505 506 507 508 509

	/* alloc and init base part */
	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
	if (!blkg)
		return NULL;

	spin_lock_init(&blkg->stats_lock);
T
Tejun Heo 已提交
510
	blkg->q = q;
511
	INIT_LIST_HEAD(&blkg->q_node);
512
	blkg->blkcg = blkcg;
T
Tejun Heo 已提交
513
	blkg->refcnt = 1;
514 515
	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));

516 517 518
	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
		struct blkio_policy_type *pol = blkio_policy[i];
		struct blkg_policy_data *pd;
519

520 521 522 523 524 525 526 527 528 529
		if (!pol)
			continue;

		/* alloc per-policy data and attach it to blkg */
		pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
				  q->node);
		if (!pd) {
			blkg_free(blkg);
			return NULL;
		}
530

531 532
		blkg->pd[i] = pd;
		pd->blkg = blkg;
533

534 535 536 537 538 539
		/* broken, read comment in the callsite */
		pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
		if (!pd->stats_cpu) {
			blkg_free(blkg);
			return NULL;
		}
540 541
	}

542
	/* invoke per-policy init */
543 544 545 546 547 548 549
	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
		struct blkio_policy_type *pol = blkio_policy[i];

		if (pol)
			pol->ops.blkio_init_group_fn(blkg);
	}

550 551 552
	return blkg;
}

553 554 555 556 557
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
				       struct request_queue *q,
				       enum blkio_policy_id plid,
				       bool for_root)
	__releases(q->queue_lock) __acquires(q->queue_lock)
558
{
559
	struct blkio_group *blkg, *new_blkg;
560

561 562 563 564 565 566 567 568 569 570 571 572
	WARN_ON_ONCE(!rcu_read_lock_held());
	lockdep_assert_held(q->queue_lock);

	/*
	 * This could be the first entry point of blkcg implementation and
	 * we shouldn't allow anything to go through for a bypassing queue.
	 * The following can be removed if blkg lookup is guaranteed to
	 * fail on a bypassing queue.
	 */
	if (unlikely(blk_queue_bypass(q)) && !for_root)
		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);

573
	blkg = blkg_lookup(blkcg, q);
574 575 576
	if (blkg)
		return blkg;

577
	/* blkg holds a reference to blkcg */
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
	if (!css_tryget(&blkcg->css))
		return ERR_PTR(-EINVAL);

	/*
	 * Allocate and initialize.
	 *
	 * FIXME: The following is broken.  Percpu memory allocation
	 * requires %GFP_KERNEL context and can't be performed from IO
	 * path.  Allocation here should inherently be atomic and the
	 * following lock dancing can be removed once the broken percpu
	 * allocation is fixed.
	 */
	spin_unlock_irq(q->queue_lock);
	rcu_read_unlock();

593
	new_blkg = blkg_alloc(blkcg, q);
594 595 596

	rcu_read_lock();
	spin_lock_irq(q->queue_lock);
597

598 599 600 601 602 603 604
	/* did bypass get turned on inbetween? */
	if (unlikely(blk_queue_bypass(q)) && !for_root) {
		blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
		goto out;
	}

	/* did someone beat us to it? */
605
	blkg = blkg_lookup(blkcg, q);
606 607 608 609
	if (unlikely(blkg))
		goto out;

	/* did alloc fail? */
610
	if (unlikely(!new_blkg)) {
611 612 613 614 615 616 617
		blkg = ERR_PTR(-ENOMEM);
		goto out;
	}

	/* insert */
	spin_lock(&blkcg->lock);
	swap(blkg, new_blkg);
618

619
	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
620
	list_add(&blkg->q_node, &q->blkg_list);
621

622 623
	spin_unlock(&blkcg->lock);
out:
624
	blkg_free(new_blkg);
625
	return blkg;
626
}
627
EXPORT_SYMBOL_GPL(blkg_lookup_create);
628 629

/* called under rcu_read_lock(). */
630
struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
631
				struct request_queue *q)
632 633 634 635
{
	struct blkio_group *blkg;
	struct hlist_node *n;

636
	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
637
		if (blkg->q == q)
638 639 640
			return blkg;
	return NULL;
}
641
EXPORT_SYMBOL_GPL(blkg_lookup);
642

643
static void blkg_destroy(struct blkio_group *blkg)
644 645
{
	struct request_queue *q = blkg->q;
646
	struct blkio_cgroup *blkcg = blkg->blkcg;
647 648

	lockdep_assert_held(q->queue_lock);
649
	lockdep_assert_held(&blkcg->lock);
650 651

	/* Something wrong if we are trying to remove same group twice */
652
	WARN_ON_ONCE(list_empty(&blkg->q_node));
653
	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
654
	list_del_init(&blkg->q_node);
655
	hlist_del_init_rcu(&blkg->blkcg_node);
656 657 658 659 660 661 662 663

	/*
	 * Put the reference taken at the time of creation so that when all
	 * queues are gone, group can be destroyed.
	 */
	blkg_put(blkg);
}

664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
/*
 * XXX: This updates blkg policy data in-place for root blkg, which is
 * necessary across elevator switch and policy registration as root blkgs
 * aren't shot down.  This broken and racy implementation is temporary.
 * Eventually, blkg shoot down will be replaced by proper in-place update.
 */
void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
{
	struct blkio_policy_type *pol = blkio_policy[plid];
	struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
	struct blkg_policy_data *pd;

	if (!blkg)
		return;

	kfree(blkg->pd[plid]);
	blkg->pd[plid] = NULL;

	if (!pol)
		return;

	pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
	WARN_ON_ONCE(!pd);

	pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
	WARN_ON_ONCE(!pd->stats_cpu);

	blkg->pd[plid] = pd;
	pd->blkg = blkg;
	pol->ops.blkio_init_group_fn(blkg);
}
EXPORT_SYMBOL_GPL(update_root_blkg_pd);

697 698 699 700 701 702 703 704
/**
 * blkg_destroy_all - destroy all blkgs associated with a request_queue
 * @q: request_queue of interest
 * @destroy_root: whether to destroy root blkg or not
 *
 * Destroy blkgs associated with @q.  If @destroy_root is %true, all are
 * destroyed; otherwise, root blkg is left alone.
 */
705
void blkg_destroy_all(struct request_queue *q, bool destroy_root)
706
{
707
	struct blkio_group *blkg, *n;
708

709
	spin_lock_irq(q->queue_lock);
710

711 712
	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
		struct blkio_cgroup *blkcg = blkg->blkcg;
713

714 715 716
		/* skip root? */
		if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
			continue;
717

718 719 720
		spin_lock(&blkcg->lock);
		blkg_destroy(blkg);
		spin_unlock(&blkcg->lock);
721
	}
722 723

	spin_unlock_irq(q->queue_lock);
724
}
725
EXPORT_SYMBOL_GPL(blkg_destroy_all);
726

T
Tejun Heo 已提交
727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
static void blkg_rcu_free(struct rcu_head *rcu_head)
{
	blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
}

void __blkg_release(struct blkio_group *blkg)
{
	/* release the extra blkcg reference this blkg has been holding */
	css_put(&blkg->blkcg->css);

	/*
	 * A group is freed in rcu manner. But having an rcu lock does not
	 * mean that one can access all the fields of blkg and assume these
	 * are valid. For example, don't try to follow throtl_data and
	 * request queue links.
	 *
	 * Having a reference to blkg under an rcu allows acess to only
	 * values local to groups like group stats and group rate limits
	 */
	call_rcu(&blkg->rcu_head, blkg_rcu_free);
}
EXPORT_SYMBOL_GPL(__blkg_release);

750
static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
751
{
752
	struct blkg_policy_data *pd = blkg->pd[plid];
753 754 755 756 757 758 759 760 761 762 763 764 765 766
	struct blkio_group_stats_cpu *stats_cpu;
	int i, j, k;
	/*
	 * Note: On 64 bit arch this should not be an issue. This has the
	 * possibility of returning some inconsistent value on 32bit arch
	 * as 64bit update on 32bit is non atomic. Taking care of this
	 * corner case makes code very complicated, like sending IPIs to
	 * cpus, taking care of stats of offline cpus etc.
	 *
	 * reset stats is anyway more of a debug feature and this sounds a
	 * corner case. So I am not complicating the code yet until and
	 * unless this becomes a real issue.
	 */
	for_each_possible_cpu(i) {
767
		stats_cpu = per_cpu_ptr(pd->stats_cpu, i);
768 769 770 771 772 773 774
		stats_cpu->sectors = 0;
		for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
			for (k = 0; k < BLKIO_STAT_TOTAL; k++)
				stats_cpu->stat_arr_cpu[j][k] = 0;
	}
}

775
static int
776
blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
777 778 779
{
	struct blkio_cgroup *blkcg;
	struct blkio_group *blkg;
780
	struct blkio_group_stats *stats;
781
	struct hlist_node *n;
782 783
	uint64_t queued[BLKIO_STAT_TOTAL];
	int i;
784 785 786 787
#ifdef CONFIG_DEBUG_BLK_CGROUP
	bool idling, waiting, empty;
	unsigned long long now = sched_clock();
#endif
788 789

	blkcg = cgroup_to_blkio_cgroup(cgroup);
790
	spin_lock(&blkio_list_lock);
791 792
	spin_lock_irq(&blkcg->lock);
	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
793
		struct blkio_policy_type *pol;
794

795 796 797 798 799
		list_for_each_entry(pol, &blkio_list, list) {
			struct blkg_policy_data *pd = blkg->pd[pol->plid];

			spin_lock(&blkg->stats_lock);
			stats = &pd->stats;
800
#ifdef CONFIG_DEBUG_BLK_CGROUP
801 802 803
			idling = blkio_blkg_idling(stats);
			waiting = blkio_blkg_waiting(stats);
			empty = blkio_blkg_empty(stats);
804
#endif
805 806 807 808 809
			for (i = 0; i < BLKIO_STAT_TOTAL; i++)
				queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
			memset(stats, 0, sizeof(struct blkio_group_stats));
			for (i = 0; i < BLKIO_STAT_TOTAL; i++)
				stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
810
#ifdef CONFIG_DEBUG_BLK_CGROUP
811 812 813 814 815 816 817 818 819 820 821 822
			if (idling) {
				blkio_mark_blkg_idling(stats);
				stats->start_idle_time = now;
			}
			if (waiting) {
				blkio_mark_blkg_waiting(stats);
				stats->start_group_wait_time = now;
			}
			if (empty) {
				blkio_mark_blkg_empty(stats);
				stats->start_empty_time = now;
			}
823
#endif
824
			spin_unlock(&blkg->stats_lock);
825

826 827 828
			/* Reset Per cpu stats which don't take blkg->stats_lock */
			blkio_reset_stats_cpu(blkg, pol->plid);
		}
829
	}
830

831
	spin_unlock_irq(&blkcg->lock);
832
	spin_unlock(&blkio_list_lock);
833 834 835
	return 0;
}

836 837
static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
			       char *str, int chars_left, bool diskname_only)
838
{
839
	snprintf(str, chars_left, "%s", dname);
840 841 842 843 844 845
	chars_left -= strlen(str);
	if (chars_left <= 0) {
		printk(KERN_WARNING
			"Possibly incorrect cgroup stat display format");
		return;
	}
846 847
	if (diskname_only)
		return;
848
	switch (type) {
849
	case BLKIO_STAT_READ:
850 851
		strlcat(str, " Read", chars_left);
		break;
852
	case BLKIO_STAT_WRITE:
853 854
		strlcat(str, " Write", chars_left);
		break;
855
	case BLKIO_STAT_SYNC:
856 857
		strlcat(str, " Sync", chars_left);
		break;
858
	case BLKIO_STAT_ASYNC:
859 860
		strlcat(str, " Async", chars_left);
		break;
861
	case BLKIO_STAT_TOTAL:
862 863 864 865 866 867 868
		strlcat(str, " Total", chars_left);
		break;
	default:
		strlcat(str, " Invalid", chars_left);
	}
}

869
static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
870
				struct cgroup_map_cb *cb, const char *dname)
871
{
872
	blkio_get_key_name(0, dname, str, chars_left, true);
873 874 875
	cb->fill(cb, str, val);
	return val;
}
876

877

878
static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
879 880
			enum stat_type_cpu type, enum stat_sub_type sub_type)
{
881
	struct blkg_policy_data *pd = blkg->pd[plid];
882 883
	int cpu;
	struct blkio_group_stats_cpu *stats_cpu;
884
	u64 val = 0, tval;
885 886

	for_each_possible_cpu(cpu) {
887
		unsigned int start;
888
		stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu);
889

890 891 892 893 894 895 896 897 898
		do {
			start = u64_stats_fetch_begin(&stats_cpu->syncp);
			if (type == BLKIO_STAT_CPU_SECTORS)
				tval = stats_cpu->sectors;
			else
				tval = stats_cpu->stat_arr_cpu[type][sub_type];
		} while(u64_stats_fetch_retry(&stats_cpu->syncp, start));

		val += tval;
899 900 901 902 903
	}

	return val;
}

904
static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
905 906
				   struct cgroup_map_cb *cb, const char *dname,
				   enum stat_type_cpu type)
907 908 909 910 911 912
{
	uint64_t disk_total, val;
	char key_str[MAX_KEY_LEN];
	enum stat_sub_type sub_type;

	if (type == BLKIO_STAT_CPU_SECTORS) {
913
		val = blkio_read_stat_cpu(blkg, plid, type, 0);
914 915
		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb,
				       dname);
916 917 918 919
	}

	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
			sub_type++) {
920 921
		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
				   false);
922
		val = blkio_read_stat_cpu(blkg, plid, type, sub_type);
923 924 925
		cb->fill(cb, key_str, val);
	}

926 927
	disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) +
		blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE);
928

929 930
	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
			   false);
931 932 933 934
	cb->fill(cb, key_str, disk_total);
	return disk_total;
}

935
/* This should be called with blkg->stats_lock held */
936
static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
937 938
			       struct cgroup_map_cb *cb, const char *dname,
			       enum stat_type type)
939
{
940
	struct blkg_policy_data *pd = blkg->pd[plid];
941 942
	uint64_t disk_total;
	char key_str[MAX_KEY_LEN];
943 944 945 946
	enum stat_sub_type sub_type;

	if (type == BLKIO_STAT_TIME)
		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
947
					pd->stats.time, cb, dname);
948
#ifdef CONFIG_DEBUG_BLK_CGROUP
949 950
	if (type == BLKIO_STAT_UNACCOUNTED_TIME)
		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
951
				       pd->stats.unaccounted_time, cb, dname);
952
	if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
953 954
		uint64_t sum = pd->stats.avg_queue_size_sum;
		uint64_t samples = pd->stats.avg_queue_size_samples;
955 956 957 958
		if (samples)
			do_div(sum, samples);
		else
			sum = 0;
959 960
		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
				       sum, cb, dname);
961
	}
962 963
	if (type == BLKIO_STAT_GROUP_WAIT_TIME)
		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
964
				       pd->stats.group_wait_time, cb, dname);
965 966
	if (type == BLKIO_STAT_IDLE_TIME)
		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
967
				       pd->stats.idle_time, cb, dname);
968 969
	if (type == BLKIO_STAT_EMPTY_TIME)
		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
970
				       pd->stats.empty_time, cb, dname);
971 972
	if (type == BLKIO_STAT_DEQUEUE)
		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
973
				       pd->stats.dequeue, cb, dname);
974
#endif
975

976 977
	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
			sub_type++) {
978 979
		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
				   false);
980
		cb->fill(cb, key_str, pd->stats.stat_arr[type][sub_type]);
981
	}
982 983
	disk_total = pd->stats.stat_arr[type][BLKIO_STAT_READ] +
			pd->stats.stat_arr[type][BLKIO_STAT_WRITE];
984 985
	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
			   false);
986 987 988 989
	cb->fill(cb, key_str, disk_total);
	return disk_total;
}

T
Tejun Heo 已提交
990 991
static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
				      int fileid, struct blkio_cgroup *blkcg)
992
{
993
	struct gendisk *disk = NULL;
994
	struct blkio_group *blkg = NULL;
995
	struct blkg_policy_data *pd;
996
	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
997
	unsigned long major, minor;
998 999
	int i = 0, ret = -EINVAL;
	int part;
1000
	dev_t dev;
1001
	u64 temp;
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016

	memset(s, 0, sizeof(s));

	while ((p = strsep(&buf, " ")) != NULL) {
		if (!*p)
			continue;

		s[i++] = p;

		/* Prevent from inputing too many things */
		if (i == 3)
			break;
	}

	if (i != 2)
1017
		goto out;
1018 1019 1020 1021 1022

	p = strsep(&s[0], ":");
	if (p != NULL)
		major_s = p;
	else
1023
		goto out;
1024 1025 1026

	minor_s = s[0];
	if (!minor_s)
1027
		goto out;
1028

1029 1030
	if (strict_strtoul(major_s, 10, &major))
		goto out;
1031

1032 1033
	if (strict_strtoul(minor_s, 10, &minor))
		goto out;
1034 1035 1036

	dev = MKDEV(major, minor);

1037 1038
	if (strict_strtoull(s[1], 10, &temp))
		goto out;
1039

1040
	disk = get_gendisk(dev, &part);
T
Tejun Heo 已提交
1041
	if (!disk || part)
1042 1043 1044 1045
		goto out;

	rcu_read_lock();

T
Tejun Heo 已提交
1046 1047 1048
	spin_lock_irq(disk->queue->queue_lock);
	blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
	spin_unlock_irq(disk->queue->queue_lock);
1049

T
Tejun Heo 已提交
1050 1051 1052
	if (IS_ERR(blkg)) {
		ret = PTR_ERR(blkg);
		goto out_unlock;
1053
	}
1054

1055 1056
	pd = blkg->pd[plid];

1057 1058
	switch (plid) {
	case BLKIO_POLICY_PROP:
1059 1060
		if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
		     temp > BLKIO_WEIGHT_MAX)
1061
			goto out_unlock;
1062

1063
		pd->conf.weight = temp;
1064
		blkio_update_group_weight(blkg, plid, temp ?: blkcg->weight);
1065 1066
		break;
	case BLKIO_POLICY_THROTL:
1067 1068
		switch(fileid) {
		case BLKIO_THROTL_read_bps_device:
1069
			pd->conf.bps[READ] = temp;
1070
			blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
1071
			break;
1072
		case BLKIO_THROTL_write_bps_device:
1073
			pd->conf.bps[WRITE] = temp;
1074
			blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
1075 1076
			break;
		case BLKIO_THROTL_read_iops_device:
1077 1078
			if (temp > THROTL_IOPS_MAX)
				goto out_unlock;
1079
			pd->conf.iops[READ] = temp;
1080
			blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
1081
			break;
1082
		case BLKIO_THROTL_write_iops_device:
1083
			if (temp > THROTL_IOPS_MAX)
1084
				goto out_unlock;
1085
			pd->conf.iops[WRITE] = temp;
1086
			blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
1087 1088
			break;
		}
1089 1090 1091 1092
		break;
	default:
		BUG();
	}
1093
	ret = 0;
1094 1095
out_unlock:
	rcu_read_unlock();
1096 1097
out:
	put_disk(disk);
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108

	/*
	 * If queue was bypassing, we should retry.  Do so after a short
	 * msleep().  It isn't strictly necessary but queue can be
	 * bypassing for some time and it's always nice to avoid busy
	 * looping.
	 */
	if (ret == -EBUSY) {
		msleep(10);
		return restart_syscall();
	}
1109
	return ret;
1110 1111
}

1112 1113
static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
 				       const char *buffer)
1114 1115 1116
{
	int ret = 0;
	char *buf;
1117
	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
1118 1119
	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
	int fileid = BLKIOFILE_ATTR(cft->private);
1120 1121 1122 1123 1124

	buf = kstrdup(buffer, GFP_KERNEL);
	if (!buf)
		return -ENOMEM;

T
Tejun Heo 已提交
1125
	ret = blkio_policy_parse_and_set(buf, plid, fileid, blkcg);
1126 1127 1128 1129
	kfree(buf);
	return ret;
}

1130 1131 1132 1133 1134 1135 1136 1137
static const char *blkg_dev_name(struct blkio_group *blkg)
{
	/* some drivers (floppy) instantiate a queue w/o disk registered */
	if (blkg->q->backing_dev_info.dev)
		return dev_name(blkg->q->backing_dev_info.dev);
	return NULL;
}

T
Tejun Heo 已提交
1138 1139
static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
				   struct seq_file *m)
1140
{
1141
	int plid = BLKIOFILE_POLICY(cft->private);
T
Tejun Heo 已提交
1142
	int fileid = BLKIOFILE_ATTR(cft->private);
1143 1144
	struct blkg_policy_data *pd = blkg->pd[plid];
	const char *dname = blkg_dev_name(blkg);
T
Tejun Heo 已提交
1145 1146
	int rw = WRITE;

1147 1148 1149
	if (!dname)
		return;

1150
	switch (plid) {
1151
		case BLKIO_POLICY_PROP:
1152
			if (pd->conf.weight)
1153
				seq_printf(m, "%s\t%u\n",
1154
					   dname, pd->conf.weight);
1155 1156
			break;
		case BLKIO_POLICY_THROTL:
T
Tejun Heo 已提交
1157
			switch (fileid) {
1158
			case BLKIO_THROTL_read_bps_device:
T
Tejun Heo 已提交
1159
				rw = READ;
1160
			case BLKIO_THROTL_write_bps_device:
1161
				if (pd->conf.bps[rw])
1162
					seq_printf(m, "%s\t%llu\n",
1163
						   dname, pd->conf.bps[rw]);
1164 1165
				break;
			case BLKIO_THROTL_read_iops_device:
T
Tejun Heo 已提交
1166
				rw = READ;
1167
			case BLKIO_THROTL_write_iops_device:
1168
				if (pd->conf.iops[rw])
1169
					seq_printf(m, "%s\t%u\n",
1170
						   dname, pd->conf.iops[rw]);
1171 1172
				break;
			}
1173 1174 1175 1176 1177
			break;
		default:
			BUG();
	}
}
1178

1179
/* cgroup files which read their data from policy nodes end up here */
T
Tejun Heo 已提交
1180 1181
static void blkio_read_conf(struct cftype *cft, struct blkio_cgroup *blkcg,
			    struct seq_file *m)
1182
{
T
Tejun Heo 已提交
1183 1184
	struct blkio_group *blkg;
	struct hlist_node *n;
1185

T
Tejun Heo 已提交
1186 1187
	spin_lock_irq(&blkcg->lock);
	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
1188
		blkio_print_group_conf(cft, blkg, m);
T
Tejun Heo 已提交
1189
	spin_unlock_irq(&blkcg->lock);
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
}

static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
				struct seq_file *m)
{
	struct blkio_cgroup *blkcg;
	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
	int name = BLKIOFILE_ATTR(cft->private);

	blkcg = cgroup_to_blkio_cgroup(cgrp);

	switch(plid) {
	case BLKIO_POLICY_PROP:
		switch(name) {
		case BLKIO_PROP_weight_device:
T
Tejun Heo 已提交
1205
			blkio_read_conf(cft, blkcg, m);
1206 1207 1208 1209 1210
			return 0;
		default:
			BUG();
		}
		break;
1211 1212 1213 1214
	case BLKIO_POLICY_THROTL:
		switch(name){
		case BLKIO_THROTL_read_bps_device:
		case BLKIO_THROTL_write_bps_device:
1215 1216
		case BLKIO_THROTL_read_iops_device:
		case BLKIO_THROTL_write_iops_device:
T
Tejun Heo 已提交
1217
			blkio_read_conf(cft, blkcg, m);
1218 1219 1220 1221 1222
			return 0;
		default:
			BUG();
		}
		break;
1223 1224 1225 1226 1227 1228 1229 1230
	default:
		BUG();
	}

	return 0;
}

static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1231 1232
		struct cftype *cft, struct cgroup_map_cb *cb,
		enum stat_type type, bool show_total, bool pcpu)
1233 1234 1235 1236 1237
{
	struct blkio_group *blkg;
	struct hlist_node *n;
	uint64_t cgroup_total = 0;

T
Tejun Heo 已提交
1238 1239 1240
	spin_lock_irq(&blkcg->lock);

	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1241
		const char *dname = blkg_dev_name(blkg);
1242
		int plid = BLKIOFILE_POLICY(cft->private);
1243

1244
		if (!dname)
1245
			continue;
1246 1247 1248 1249
		if (pcpu) {
			cgroup_total += blkio_get_stat_cpu(blkg, plid,
							   cb, dname, type);
		} else {
T
Tejun Heo 已提交
1250
			spin_lock(&blkg->stats_lock);
1251 1252
			cgroup_total += blkio_get_stat(blkg, plid,
						       cb, dname, type);
T
Tejun Heo 已提交
1253
			spin_unlock(&blkg->stats_lock);
1254 1255 1256 1257
		}
	}
	if (show_total)
		cb->fill(cb, "Total", cgroup_total);
T
Tejun Heo 已提交
1258 1259

	spin_unlock_irq(&blkcg->lock);
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
	return 0;
}

/* All map kind of cgroup file get serviced by this function */
static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
				struct cgroup_map_cb *cb)
{
	struct blkio_cgroup *blkcg;
	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
	int name = BLKIOFILE_ATTR(cft->private);

	blkcg = cgroup_to_blkio_cgroup(cgrp);

	switch(plid) {
	case BLKIO_POLICY_PROP:
		switch(name) {
		case BLKIO_PROP_time:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1278
						BLKIO_STAT_TIME, 0, 0);
1279 1280
		case BLKIO_PROP_sectors:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1281
						BLKIO_STAT_CPU_SECTORS, 0, 1);
1282 1283
		case BLKIO_PROP_io_service_bytes:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1284
					BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
1285 1286
		case BLKIO_PROP_io_serviced:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1287
						BLKIO_STAT_CPU_SERVICED, 1, 1);
1288 1289
		case BLKIO_PROP_io_service_time:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1290
						BLKIO_STAT_SERVICE_TIME, 1, 0);
1291 1292
		case BLKIO_PROP_io_wait_time:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1293
						BLKIO_STAT_WAIT_TIME, 1, 0);
1294 1295
		case BLKIO_PROP_io_merged:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1296
						BLKIO_STAT_CPU_MERGED, 1, 1);
1297 1298
		case BLKIO_PROP_io_queued:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1299
						BLKIO_STAT_QUEUED, 1, 0);
1300
#ifdef CONFIG_DEBUG_BLK_CGROUP
1301 1302
		case BLKIO_PROP_unaccounted_time:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1303
					BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
1304 1305
		case BLKIO_PROP_dequeue:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1306
						BLKIO_STAT_DEQUEUE, 0, 0);
1307 1308
		case BLKIO_PROP_avg_queue_size:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1309
					BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
1310 1311
		case BLKIO_PROP_group_wait_time:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1312
					BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
1313 1314
		case BLKIO_PROP_idle_time:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1315
						BLKIO_STAT_IDLE_TIME, 0, 0);
1316 1317
		case BLKIO_PROP_empty_time:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1318
						BLKIO_STAT_EMPTY_TIME, 0, 0);
1319 1320 1321 1322 1323
#endif
		default:
			BUG();
		}
		break;
1324 1325 1326 1327
	case BLKIO_POLICY_THROTL:
		switch(name){
		case BLKIO_THROTL_io_service_bytes:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1328
						BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
1329 1330
		case BLKIO_THROTL_io_serviced:
			return blkio_read_blkg_stats(blkcg, cft, cb,
1331
						BLKIO_STAT_CPU_SERVICED, 1, 1);
1332 1333 1334 1335
		default:
			BUG();
		}
		break;
1336 1337 1338 1339 1340 1341 1342
	default:
		BUG();
	}

	return 0;
}

T
Tejun Heo 已提交
1343
static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354
{
	struct blkio_group *blkg;
	struct hlist_node *n;

	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
		return -EINVAL;

	spin_lock(&blkio_list_lock);
	spin_lock_irq(&blkcg->lock);
	blkcg->weight = (unsigned int)val;

1355
	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1356
		struct blkg_policy_data *pd = blkg->pd[plid];
1357

1358
		if (!pd->conf.weight)
1359
			blkio_update_group_weight(blkg, plid, blkcg->weight);
1360
	}
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399

	spin_unlock_irq(&blkcg->lock);
	spin_unlock(&blkio_list_lock);
	return 0;
}

static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
	struct blkio_cgroup *blkcg;
	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
	int name = BLKIOFILE_ATTR(cft->private);

	blkcg = cgroup_to_blkio_cgroup(cgrp);

	switch(plid) {
	case BLKIO_POLICY_PROP:
		switch(name) {
		case BLKIO_PROP_weight:
			return (u64)blkcg->weight;
		}
		break;
	default:
		BUG();
	}
	return 0;
}

static int
blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
{
	struct blkio_cgroup *blkcg;
	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
	int name = BLKIOFILE_ATTR(cft->private);

	blkcg = cgroup_to_blkio_cgroup(cgrp);

	switch(plid) {
	case BLKIO_POLICY_PROP:
		switch(name) {
		case BLKIO_PROP_weight:
T
Tejun Heo 已提交
1400
			return blkio_weight_write(blkcg, plid, val);
1401 1402 1403 1404 1405
		}
		break;
	default:
		BUG();
	}
1406 1407 1408 1409

	return 0;
}

1410
struct cftype blkio_files[] = {
1411 1412
	{
		.name = "weight_device",
1413 1414 1415 1416
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_weight_device),
		.read_seq_string = blkiocg_file_read,
		.write_string = blkiocg_file_write,
1417 1418
		.max_write_len = 256,
	},
1419 1420
	{
		.name = "weight",
1421 1422 1423 1424
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_weight),
		.read_u64 = blkiocg_file_read_u64,
		.write_u64 = blkiocg_file_write_u64,
1425
	},
1426 1427
	{
		.name = "time",
1428 1429 1430
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_time),
		.read_map = blkiocg_file_read_map,
1431 1432 1433
	},
	{
		.name = "sectors",
1434 1435 1436
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_sectors),
		.read_map = blkiocg_file_read_map,
1437 1438 1439
	},
	{
		.name = "io_service_bytes",
1440 1441 1442
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_io_service_bytes),
		.read_map = blkiocg_file_read_map,
1443 1444 1445
	},
	{
		.name = "io_serviced",
1446 1447 1448
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_io_serviced),
		.read_map = blkiocg_file_read_map,
1449 1450 1451
	},
	{
		.name = "io_service_time",
1452 1453 1454
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_io_service_time),
		.read_map = blkiocg_file_read_map,
1455 1456 1457
	},
	{
		.name = "io_wait_time",
1458 1459 1460
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_io_wait_time),
		.read_map = blkiocg_file_read_map,
1461
	},
D
Divyesh Shah 已提交
1462 1463
	{
		.name = "io_merged",
1464 1465 1466
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_io_merged),
		.read_map = blkiocg_file_read_map,
D
Divyesh Shah 已提交
1467
	},
1468 1469
	{
		.name = "io_queued",
1470 1471 1472
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_io_queued),
		.read_map = blkiocg_file_read_map,
1473
	},
1474 1475 1476
	{
		.name = "reset_stats",
		.write_u64 = blkiocg_reset_stats,
1477
	},
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527
#ifdef CONFIG_BLK_DEV_THROTTLING
	{
		.name = "throttle.read_bps_device",
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
				BLKIO_THROTL_read_bps_device),
		.read_seq_string = blkiocg_file_read,
		.write_string = blkiocg_file_write,
		.max_write_len = 256,
	},

	{
		.name = "throttle.write_bps_device",
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
				BLKIO_THROTL_write_bps_device),
		.read_seq_string = blkiocg_file_read,
		.write_string = blkiocg_file_write,
		.max_write_len = 256,
	},

	{
		.name = "throttle.read_iops_device",
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
				BLKIO_THROTL_read_iops_device),
		.read_seq_string = blkiocg_file_read,
		.write_string = blkiocg_file_write,
		.max_write_len = 256,
	},

	{
		.name = "throttle.write_iops_device",
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
				BLKIO_THROTL_write_iops_device),
		.read_seq_string = blkiocg_file_read,
		.write_string = blkiocg_file_write,
		.max_write_len = 256,
	},
	{
		.name = "throttle.io_service_bytes",
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
				BLKIO_THROTL_io_service_bytes),
		.read_map = blkiocg_file_read_map,
	},
	{
		.name = "throttle.io_serviced",
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
				BLKIO_THROTL_io_serviced),
		.read_map = blkiocg_file_read_map,
	},
#endif /* CONFIG_BLK_DEV_THROTTLING */

1528
#ifdef CONFIG_DEBUG_BLK_CGROUP
1529 1530
	{
		.name = "avg_queue_size",
1531 1532 1533
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_avg_queue_size),
		.read_map = blkiocg_file_read_map,
1534
	},
1535 1536
	{
		.name = "group_wait_time",
1537 1538 1539
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_group_wait_time),
		.read_map = blkiocg_file_read_map,
1540 1541 1542
	},
	{
		.name = "idle_time",
1543 1544 1545
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_idle_time),
		.read_map = blkiocg_file_read_map,
1546 1547 1548
	},
	{
		.name = "empty_time",
1549 1550 1551
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_empty_time),
		.read_map = blkiocg_file_read_map,
1552
	},
1553
	{
1554
		.name = "dequeue",
1555 1556 1557
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_dequeue),
		.read_map = blkiocg_file_read_map,
1558
	},
1559 1560 1561 1562 1563 1564
	{
		.name = "unaccounted_time",
		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
				BLKIO_PROP_unaccounted_time),
		.read_map = blkiocg_file_read_map,
	},
1565
#endif
1566 1567 1568 1569 1570 1571 1572 1573
};

static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
{
	return cgroup_add_files(cgroup, subsys, blkio_files,
				ARRAY_SIZE(blkio_files));
}

1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
/**
 * blkiocg_pre_destroy - cgroup pre_destroy callback
 * @subsys: cgroup subsys
 * @cgroup: cgroup of interest
 *
 * This function is called when @cgroup is about to go away and responsible
 * for shooting down all blkgs associated with @cgroup.  blkgs should be
 * removed while holding both q and blkcg locks.  As blkcg lock is nested
 * inside q lock, this function performs reverse double lock dancing.
 *
 * This is the blkcg counterpart of ioc_release_fn().
 */
1586 1587
static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
			       struct cgroup *cgroup)
1588 1589
{
	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1590

1591
	spin_lock_irq(&blkcg->lock);
1592

1593 1594 1595
	while (!hlist_empty(&blkcg->blkg_list)) {
		struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
						struct blkio_group, blkcg_node);
T
Tejun Heo 已提交
1596
		struct request_queue *q = blkg->q;
1597

1598 1599 1600 1601 1602 1603 1604
		if (spin_trylock(q->queue_lock)) {
			blkg_destroy(blkg);
			spin_unlock(q->queue_lock);
		} else {
			spin_unlock_irq(&blkcg->lock);
			cpu_relax();
			spin_lock(&blkcg->lock);
1605
		}
1606
	}
1607

1608
	spin_unlock_irq(&blkcg->lock);
1609 1610 1611 1612 1613 1614 1615
	return 0;
}

static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
{
	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);

B
Ben Blum 已提交
1616 1617
	if (blkcg != &blkio_root_cgroup)
		kfree(blkcg);
1618 1619 1620 1621 1622
}

static struct cgroup_subsys_state *
blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
{
1623 1624
	struct blkio_cgroup *blkcg;
	struct cgroup *parent = cgroup->parent;
1625

1626
	if (!parent) {
1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642
		blkcg = &blkio_root_cgroup;
		goto done;
	}

	blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
	if (!blkcg)
		return ERR_PTR(-ENOMEM);

	blkcg->weight = BLKIO_WEIGHT_DEFAULT;
done:
	spin_lock_init(&blkcg->lock);
	INIT_HLIST_HEAD(&blkcg->blkg_list);

	return &blkcg->css;
}

1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654
/**
 * blkcg_init_queue - initialize blkcg part of request queue
 * @q: request_queue to initialize
 *
 * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
 * part of new request_queue @q.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int blkcg_init_queue(struct request_queue *q)
{
1655 1656
	int ret;

1657 1658
	might_sleep();

1659 1660 1661 1662 1663 1664 1665 1666 1667 1668
	ret = blk_throtl_init(q);
	if (ret)
		return ret;

	mutex_lock(&all_q_mutex);
	INIT_LIST_HEAD(&q->all_q_node);
	list_add_tail(&q->all_q_node, &all_q_list);
	mutex_unlock(&all_q_mutex);

	return 0;
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691
}

/**
 * blkcg_drain_queue - drain blkcg part of request_queue
 * @q: request_queue to drain
 *
 * Called from blk_drain_queue().  Responsible for draining blkcg part.
 */
void blkcg_drain_queue(struct request_queue *q)
{
	lockdep_assert_held(q->queue_lock);

	blk_throtl_drain(q);
}

/**
 * blkcg_exit_queue - exit and release blkcg part of request_queue
 * @q: request_queue being released
 *
 * Called from blk_release_queue().  Responsible for exiting blkcg part.
 */
void blkcg_exit_queue(struct request_queue *q)
{
1692 1693 1694 1695
	mutex_lock(&all_q_mutex);
	list_del_init(&q->all_q_node);
	mutex_unlock(&all_q_mutex);

1696 1697
	blkg_destroy_all(q, true);

1698 1699 1700
	blk_throtl_exit(q);
}

1701 1702 1703 1704 1705 1706
/*
 * We cannot support shared io contexts, as we have no mean to support
 * two tasks with the same ioc in two different groups without major rework
 * of the main cic data structures.  For now we allow a task to change
 * its cgroup only if it's the only owner of its ioc.
 */
1707 1708
static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
			      struct cgroup_taskset *tset)
1709
{
1710
	struct task_struct *task;
1711 1712 1713 1714
	struct io_context *ioc;
	int ret = 0;

	/* task_lock() is needed to avoid races with exit_io_context() */
1715 1716 1717 1718 1719 1720 1721 1722 1723
	cgroup_taskset_for_each(task, cgrp, tset) {
		task_lock(task);
		ioc = task->io_context;
		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
			ret = -EINVAL;
		task_unlock(task);
		if (ret)
			break;
	}
1724 1725 1726
	return ret;
}

1727 1728
static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
			   struct cgroup_taskset *tset)
1729
{
1730
	struct task_struct *task;
1731 1732
	struct io_context *ioc;

1733
	cgroup_taskset_for_each(task, cgrp, tset) {
1734 1735 1736 1737
		/* we don't lose anything even if ioc allocation fails */
		ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
		if (ioc) {
			ioc_cgroup_changed(ioc);
1738
			put_io_context(ioc);
1739
		}
1740
	}
1741 1742
}

1743 1744 1745 1746 1747 1748 1749 1750 1751
static void blkcg_bypass_start(void)
	__acquires(&all_q_mutex)
{
	struct request_queue *q;

	mutex_lock(&all_q_mutex);

	list_for_each_entry(q, &all_q_list, all_q_node) {
		blk_queue_bypass_start(q);
1752
		blkg_destroy_all(q, false);
1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766
	}
}

static void blkcg_bypass_end(void)
	__releases(&all_q_mutex)
{
	struct request_queue *q;

	list_for_each_entry(q, &all_q_list, all_q_node)
		blk_queue_bypass_end(q);

	mutex_unlock(&all_q_mutex);
}

1767 1768
void blkio_policy_register(struct blkio_policy_type *blkiop)
{
1769 1770
	struct request_queue *q;

1771
	blkcg_bypass_start();
1772
	spin_lock(&blkio_list_lock);
1773 1774 1775

	BUG_ON(blkio_policy[blkiop->plid]);
	blkio_policy[blkiop->plid] = blkiop;
1776
	list_add_tail(&blkiop->list, &blkio_list);
1777

1778
	spin_unlock(&blkio_list_lock);
1779 1780
	list_for_each_entry(q, &all_q_list, all_q_node)
		update_root_blkg_pd(q, blkiop->plid);
1781
	blkcg_bypass_end();
1782 1783 1784 1785 1786
}
EXPORT_SYMBOL_GPL(blkio_policy_register);

void blkio_policy_unregister(struct blkio_policy_type *blkiop)
{
1787 1788
	struct request_queue *q;

1789
	blkcg_bypass_start();
1790
	spin_lock(&blkio_list_lock);
1791 1792 1793

	BUG_ON(blkio_policy[blkiop->plid] != blkiop);
	blkio_policy[blkiop->plid] = NULL;
1794
	list_del_init(&blkiop->list);
1795

1796
	spin_unlock(&blkio_list_lock);
1797 1798
	list_for_each_entry(q, &all_q_list, all_q_node)
		update_root_blkg_pd(q, blkiop->plid);
1799
	blkcg_bypass_end();
1800 1801
}
EXPORT_SYMBOL_GPL(blkio_policy_unregister);