blk-cgroup.h 13.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
#ifndef _BLK_CGROUP_H
#define _BLK_CGROUP_H
/*
 * Common Block IO controller cgroup interface
 *
 * Based on ideas and code from CFQ, CFS and BFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *		      Paolo Valente <paolo.valente@unimore.it>
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 * 	              Nauman Rafique <nauman@google.com>
 */

#include <linux/cgroup.h>
17
#include <linux/u64_stats_sync.h>
18

19 20
enum blkio_policy_id {
	BLKIO_POLICY_PROP = 0,		/* Proportional Bandwidth division */
21
	BLKIO_POLICY_THROTL,		/* Throttling */
22 23

	BLKIO_NR_POLICIES,
24 25
};

26 27 28
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX		UINT_MAX

T
Tejun Heo 已提交
29
#ifdef CONFIG_BLK_CGROUP
30

31
enum stat_type {
32 33
	/* Number of IOs merged */
	BLKIO_STAT_MERGED,
34 35 36
	/* Total time spent (in ns) between request dispatch to the driver and
	 * request completion for IOs doen by this cgroup. This may not be
	 * accurate when NCQ is turned on. */
37
	BLKIO_STAT_SERVICE_TIME,
38 39
	/* Total time spent waiting in scheduler queue in ns */
	BLKIO_STAT_WAIT_TIME,
40 41
	/* Number of IOs queued up */
	BLKIO_STAT_QUEUED,
42 43
	/* All the single valued stats go below this */
	BLKIO_STAT_TIME,
44
#ifdef CONFIG_DEBUG_BLK_CGROUP
45 46
	/* Time not charged to this cgroup */
	BLKIO_STAT_UNACCOUNTED_TIME,
47
	BLKIO_STAT_AVG_QUEUE_SIZE,
48 49 50
	BLKIO_STAT_IDLE_TIME,
	BLKIO_STAT_EMPTY_TIME,
	BLKIO_STAT_GROUP_WAIT_TIME,
51 52 53 54
	BLKIO_STAT_DEQUEUE
#endif
};

55 56 57 58 59 60 61 62 63 64
/* Per cpu stats */
enum stat_type_cpu {
	BLKIO_STAT_CPU_SECTORS,
	/* Total bytes transferred */
	BLKIO_STAT_CPU_SERVICE_BYTES,
	/* Total IOs serviced, post merge */
	BLKIO_STAT_CPU_SERVICED,
	BLKIO_STAT_CPU_NR
};

65 66 67 68 69 70
enum stat_sub_type {
	BLKIO_STAT_READ = 0,
	BLKIO_STAT_WRITE,
	BLKIO_STAT_SYNC,
	BLKIO_STAT_ASYNC,
	BLKIO_STAT_TOTAL
71 72
};

73 74 75 76 77 78 79
/* blkg state flags */
enum blkg_state_flags {
	BLKG_waiting = 0,
	BLKG_idling,
	BLKG_empty,
};

80 81 82 83 84 85 86 87
/* cgroup files owned by proportional weight policy */
enum blkcg_file_name_prop {
	BLKIO_PROP_weight = 1,
	BLKIO_PROP_weight_device,
	BLKIO_PROP_io_service_bytes,
	BLKIO_PROP_io_serviced,
	BLKIO_PROP_time,
	BLKIO_PROP_sectors,
88
	BLKIO_PROP_unaccounted_time,
89 90 91 92 93 94 95 96 97 98 99
	BLKIO_PROP_io_service_time,
	BLKIO_PROP_io_wait_time,
	BLKIO_PROP_io_merged,
	BLKIO_PROP_io_queued,
	BLKIO_PROP_avg_queue_size,
	BLKIO_PROP_group_wait_time,
	BLKIO_PROP_idle_time,
	BLKIO_PROP_empty_time,
	BLKIO_PROP_dequeue,
};

100 101 102 103
/* cgroup files owned by throttle policy */
enum blkcg_file_name_throtl {
	BLKIO_THROTL_read_bps_device,
	BLKIO_THROTL_write_bps_device,
104 105
	BLKIO_THROTL_read_iops_device,
	BLKIO_THROTL_write_iops_device,
106 107 108 109
	BLKIO_THROTL_io_service_bytes,
	BLKIO_THROTL_io_serviced,
};

110 111 112 113 114 115 116
struct blkio_cgroup {
	struct cgroup_subsys_state css;
	unsigned int weight;
	spinlock_t lock;
	struct hlist_head blkg_list;
};

117 118 119
struct blkio_group_stats {
	/* total disk time and nr sectors dispatched by this group */
	uint64_t time;
120
	uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
121
#ifdef CONFIG_DEBUG_BLK_CGROUP
122 123 124
	/* Time not charged to this cgroup */
	uint64_t unaccounted_time;

125 126 127 128
	/* Sum of number of IOs queued across all samples */
	uint64_t avg_queue_size_sum;
	/* Count of samples taken for average */
	uint64_t avg_queue_size_samples;
129 130
	/* How many times this group has been removed from service tree */
	unsigned long dequeue;
131 132 133 134 135 136 137 138 139 140 141

	/* Total time spent waiting for it to be assigned a timeslice. */
	uint64_t group_wait_time;

	/* Time spent idling for this blkio_group */
	uint64_t idle_time;
	/*
	 * Total time when we have requests queued and do not contain the
	 * current active queue.
	 */
	uint64_t empty_time;
T
Tejun Heo 已提交
142 143 144 145

	/* fields after this shouldn't be cleared on stat reset */
	uint64_t start_group_wait_time;
	uint64_t start_idle_time;
146 147
	uint64_t start_empty_time;
	uint16_t flags;
148 149 150
#endif
};

T
Tejun Heo 已提交
151 152 153 154 155 156 157 158
#ifdef CONFIG_DEBUG_BLK_CGROUP
#define BLKG_STATS_DEBUG_CLEAR_START	\
	offsetof(struct blkio_group_stats, unaccounted_time)
#define BLKG_STATS_DEBUG_CLEAR_SIZE	\
	(offsetof(struct blkio_group_stats, start_group_wait_time) - \
	 BLKG_STATS_DEBUG_CLEAR_START)
#endif

159 160 161 162
/* Per cpu blkio group stats */
struct blkio_group_stats_cpu {
	uint64_t sectors;
	uint64_t stat_arr_cpu[BLKIO_STAT_CPU_NR][BLKIO_STAT_TOTAL];
163
	struct u64_stats_sync syncp;
164 165
};

166 167 168 169 170 171
struct blkio_group_conf {
	unsigned int weight;
	unsigned int iops[2];
	u64 bps[2];
};

172 173 174 175 176
/* per-blkg per-policy data */
struct blkg_policy_data {
	/* the blkg this per-policy data belongs to */
	struct blkio_group *blkg;

177 178 179 180 181 182 183
	/* Configuration */
	struct blkio_group_conf conf;

	struct blkio_group_stats stats;
	/* Per cpu stats pointer */
	struct blkio_group_stats_cpu __percpu *stats_cpu;

184 185 186 187
	/* pol->pdata_size bytes of private data used by policy impl */
	char pdata[] __aligned(__alignof__(unsigned long long));
};

188
struct blkio_group {
T
Tejun Heo 已提交
189 190
	/* Pointer to the associated request_queue */
	struct request_queue *q;
191
	struct list_head q_node;
192
	struct hlist_node blkcg_node;
193
	struct blkio_cgroup *blkcg;
V
Vivek Goyal 已提交
194 195
	/* Store cgroup path */
	char path[128];
T
Tejun Heo 已提交
196 197
	/* reference count */
	int refcnt;
198

199 200
	/* Need to serialize the stats in the case of reset/update */
	spinlock_t stats_lock;
201
	struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
T
Tejun Heo 已提交
202

203 204
	/* List of blkg waiting for per cpu stats memory to be allocated */
	struct list_head alloc_node;
T
Tejun Heo 已提交
205
	struct rcu_head rcu_head;
206 207
};

208
typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
209
typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
210
			struct blkio_group *blkg, unsigned int weight);
211
typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
212
			struct blkio_group *blkg, u64 read_bps);
213
typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
214
			struct blkio_group *blkg, u64 write_bps);
215
typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
216
			struct blkio_group *blkg, unsigned int read_iops);
217
typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
218
			struct blkio_group *blkg, unsigned int write_iops);
219 220

struct blkio_policy_ops {
221
	blkio_init_group_fn *blkio_init_group_fn;
222
	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
223 224
	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
225 226
	blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
	blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
227 228 229 230 231
};

struct blkio_policy_type {
	struct list_head list;
	struct blkio_policy_ops ops;
232
	enum blkio_policy_id plid;
233
	size_t pdata_size;		/* policy specific private data size */
234 235
};

236 237 238 239
extern int blkcg_init_queue(struct request_queue *q);
extern void blkcg_drain_queue(struct request_queue *q);
extern void blkcg_exit_queue(struct request_queue *q);

240 241 242
/* Blkio controller policy registration */
extern void blkio_policy_register(struct blkio_policy_type *);
extern void blkio_policy_unregister(struct blkio_policy_type *);
243 244 245
extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
extern void update_root_blkg_pd(struct request_queue *q,
				enum blkio_policy_id plid);
246

247 248 249 250 251 252 253 254 255 256
/**
 * blkg_to_pdata - get policy private data
 * @blkg: blkg of interest
 * @pol: policy of interest
 *
 * Return pointer to private data associated with the @blkg-@pol pair.
 */
static inline void *blkg_to_pdata(struct blkio_group *blkg,
			      struct blkio_policy_type *pol)
{
257
	return blkg ? blkg->pd[pol->plid]->pdata : NULL;
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
}

/**
 * pdata_to_blkg - get blkg associated with policy private data
 * @pdata: policy private data of interest
 * @pol: policy @pdata is for
 *
 * @pdata is policy private data for @pol.  Determine the blkg it's
 * associated with.
 */
static inline struct blkio_group *pdata_to_blkg(void *pdata,
						struct blkio_policy_type *pol)
{
	if (pdata) {
		struct blkg_policy_data *pd =
			container_of(pdata, struct blkg_policy_data, pdata);
		return pd->blkg;
	}
	return NULL;
}

279 280 281 282 283
static inline char *blkg_path(struct blkio_group *blkg)
{
	return blkg->path;
}

T
Tejun Heo 已提交
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
/**
 * blkg_get - get a blkg reference
 * @blkg: blkg to get
 *
 * The caller should be holding queue_lock and an existing reference.
 */
static inline void blkg_get(struct blkio_group *blkg)
{
	lockdep_assert_held(blkg->q->queue_lock);
	WARN_ON_ONCE(!blkg->refcnt);
	blkg->refcnt++;
}

void __blkg_release(struct blkio_group *blkg);

/**
 * blkg_put - put a blkg reference
 * @blkg: blkg to put
 *
 * The caller should be holding queue_lock.
 */
static inline void blkg_put(struct blkio_group *blkg)
{
	lockdep_assert_held(blkg->q->queue_lock);
	WARN_ON_ONCE(blkg->refcnt <= 0);
	if (!--blkg->refcnt)
		__blkg_release(blkg);
}

313 314 315 316 317
#else

struct blkio_group {
};

318 319 320
struct blkio_policy_type {
};

321 322 323
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
324 325
static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
326 327
static inline void blkg_destroy_all(struct request_queue *q,
				    bool destory_root) { }
328 329
static inline void update_root_blkg_pd(struct request_queue *q,
				       enum blkio_policy_id plid) { }
330

331 332 333 334
static inline void *blkg_to_pdata(struct blkio_group *blkg,
				struct blkio_policy_type *pol) { return NULL; }
static inline struct blkio_group *pdata_to_blkg(void *pdata,
				struct blkio_policy_type *pol) { return NULL; }
335
static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
T
Tejun Heo 已提交
336 337
static inline void blkg_get(struct blkio_group *blkg) { }
static inline void blkg_put(struct blkio_group *blkg) { }
338

339 340
#endif

341
#define BLKIO_WEIGHT_MIN	10
342 343 344
#define BLKIO_WEIGHT_MAX	1000
#define BLKIO_WEIGHT_DEFAULT	500

V
Vivek Goyal 已提交
345
#ifdef CONFIG_DEBUG_BLK_CGROUP
346 347
void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
					 struct blkio_policy_type *pol);
348
void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
349 350 351 352 353 354 355 356
				  struct blkio_policy_type *pol,
				  unsigned long dequeue);
void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
					struct blkio_policy_type *pol);
void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
				    struct blkio_policy_type *pol);
void blkiocg_set_start_empty_time(struct blkio_group *blkg,
				  struct blkio_policy_type *pol);
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377

#define BLKG_FLAG_FNS(name)						\
static inline void blkio_mark_blkg_##name(				\
		struct blkio_group_stats *stats)			\
{									\
	stats->flags |= (1 << BLKG_##name);				\
}									\
static inline void blkio_clear_blkg_##name(				\
		struct blkio_group_stats *stats)			\
{									\
	stats->flags &= ~(1 << BLKG_##name);				\
}									\
static inline int blkio_blkg_##name(struct blkio_group_stats *stats)	\
{									\
	return (stats->flags & (1 << BLKG_##name)) != 0;		\
}									\

BLKG_FLAG_FNS(waiting)
BLKG_FLAG_FNS(idling)
BLKG_FLAG_FNS(empty)
#undef BLKG_FLAG_FNS
V
Vivek Goyal 已提交
378
#else
379 380
static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
381
static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
382 383 384 385 386 387 388
			struct blkio_policy_type *pol, unsigned long dequeue) { }
static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
			struct blkio_policy_type *pol) { }
V
Vivek Goyal 已提交
389 390
#endif

T
Tejun Heo 已提交
391
#ifdef CONFIG_BLK_CGROUP
392 393
extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
394
extern struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
395
extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
396
				       struct request_queue *q);
397 398 399 400
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
				       struct request_queue *q,
				       enum blkio_policy_id plid,
				       bool for_root);
401
void blkiocg_update_timeslice_used(struct blkio_group *blkg,
402 403 404 405 406 407
				   struct blkio_policy_type *pol,
				   unsigned long time,
				   unsigned long unaccounted_time);
void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
				   struct blkio_policy_type *pol,
				   uint64_t bytes, bool direction, bool sync);
408
void blkiocg_update_completion_stats(struct blkio_group *blkg,
409 410 411 412 413 414 415
				     struct blkio_policy_type *pol,
				     uint64_t start_time,
				     uint64_t io_start_time, bool direction,
				     bool sync);
void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
				    struct blkio_policy_type *pol,
				    bool direction, bool sync);
416
void blkiocg_update_io_add_stats(struct blkio_group *blkg,
417 418 419
				 struct blkio_policy_type *pol,
				 struct blkio_group *curr_blkg, bool direction,
				 bool sync);
420
void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
421 422
				    struct blkio_policy_type *pol,
				    bool direction, bool sync);
423
#else
424
struct cgroup;
425 426
static inline struct blkio_cgroup *
cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
427
static inline struct blkio_cgroup *
428
bio_blkio_cgroup(struct bio *bio) { return NULL; }
429

430 431
static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
					      void *key) { return NULL; }
432
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
433 434
			struct blkio_policy_type *pol, unsigned long time,
			unsigned long unaccounted_time) { }
435
static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
436 437
			struct blkio_policy_type *pol, uint64_t bytes,
			bool direction, bool sync) { }
438
static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
439 440
			struct blkio_policy_type *pol, uint64_t start_time,
			uint64_t io_start_time, bool direction, bool sync) { }
D
Divyesh Shah 已提交
441
static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
442 443
			struct blkio_policy_type *pol, bool direction,
			bool sync) { }
444
static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
445 446 447
			struct blkio_policy_type *pol,
			struct blkio_group *curr_blkg, bool direction,
			bool sync) { }
448
static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
449 450
			struct blkio_policy_type *pol, bool direction,
			bool sync) { }
451 452
#endif
#endif /* _BLK_CGROUP_H */