blk-cgroup.h 12.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
#ifndef _BLK_CGROUP_H
#define _BLK_CGROUP_H
/*
 * Common Block IO controller cgroup interface
 *
 * Based on ideas and code from CFQ, CFS and BFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *		      Paolo Valente <paolo.valente@unimore.it>
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 * 	              Nauman Rafique <nauman@google.com>
 */

#include <linux/cgroup.h>
17
#include <linux/u64_stats_sync.h>
18

19 20
enum blkio_policy_id {
	BLKIO_POLICY_PROP = 0,		/* Proportional Bandwidth division */
21
	BLKIO_POLICY_THROTL,		/* Throttling */
22 23

	BLKIO_NR_POLICIES,
24 25
};

26 27 28
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX		UINT_MAX

T
Tejun Heo 已提交
29
#ifdef CONFIG_BLK_CGROUP
30

31 32 33 34 35 36 37
enum stat_type {
	/* Total time spent (in ns) between request dispatch to the driver and
	 * request completion for IOs doen by this cgroup. This may not be
	 * accurate when NCQ is turned on. */
	BLKIO_STAT_SERVICE_TIME = 0,
	/* Total time spent waiting in scheduler queue in ns */
	BLKIO_STAT_WAIT_TIME,
38 39
	/* Number of IOs queued up */
	BLKIO_STAT_QUEUED,
40 41
	/* All the single valued stats go below this */
	BLKIO_STAT_TIME,
42
#ifdef CONFIG_DEBUG_BLK_CGROUP
43 44
	/* Time not charged to this cgroup */
	BLKIO_STAT_UNACCOUNTED_TIME,
45
	BLKIO_STAT_AVG_QUEUE_SIZE,
46 47 48
	BLKIO_STAT_IDLE_TIME,
	BLKIO_STAT_EMPTY_TIME,
	BLKIO_STAT_GROUP_WAIT_TIME,
49 50 51 52
	BLKIO_STAT_DEQUEUE
#endif
};

53 54 55 56 57 58 59
/* Per cpu stats */
enum stat_type_cpu {
	BLKIO_STAT_CPU_SECTORS,
	/* Total bytes transferred */
	BLKIO_STAT_CPU_SERVICE_BYTES,
	/* Total IOs serviced, post merge */
	BLKIO_STAT_CPU_SERVICED,
60 61
	/* Number of IOs merged */
	BLKIO_STAT_CPU_MERGED,
62 63 64
	BLKIO_STAT_CPU_NR
};

65 66 67 68 69 70
enum stat_sub_type {
	BLKIO_STAT_READ = 0,
	BLKIO_STAT_WRITE,
	BLKIO_STAT_SYNC,
	BLKIO_STAT_ASYNC,
	BLKIO_STAT_TOTAL
71 72
};

73 74 75 76 77 78 79
/* blkg state flags */
enum blkg_state_flags {
	BLKG_waiting = 0,
	BLKG_idling,
	BLKG_empty,
};

80 81 82 83 84 85 86 87
/* cgroup files owned by proportional weight policy */
enum blkcg_file_name_prop {
	BLKIO_PROP_weight = 1,
	BLKIO_PROP_weight_device,
	BLKIO_PROP_io_service_bytes,
	BLKIO_PROP_io_serviced,
	BLKIO_PROP_time,
	BLKIO_PROP_sectors,
88
	BLKIO_PROP_unaccounted_time,
89 90 91 92 93 94 95 96 97 98 99
	BLKIO_PROP_io_service_time,
	BLKIO_PROP_io_wait_time,
	BLKIO_PROP_io_merged,
	BLKIO_PROP_io_queued,
	BLKIO_PROP_avg_queue_size,
	BLKIO_PROP_group_wait_time,
	BLKIO_PROP_idle_time,
	BLKIO_PROP_empty_time,
	BLKIO_PROP_dequeue,
};

100 101 102 103
/* cgroup files owned by throttle policy */
enum blkcg_file_name_throtl {
	BLKIO_THROTL_read_bps_device,
	BLKIO_THROTL_write_bps_device,
104 105
	BLKIO_THROTL_read_iops_device,
	BLKIO_THROTL_write_iops_device,
106 107 108 109
	BLKIO_THROTL_io_service_bytes,
	BLKIO_THROTL_io_serviced,
};

110 111 112 113 114 115 116
struct blkio_cgroup {
	struct cgroup_subsys_state css;
	unsigned int weight;
	spinlock_t lock;
	struct hlist_head blkg_list;
};

117 118 119
struct blkio_group_stats {
	/* total disk time and nr sectors dispatched by this group */
	uint64_t time;
120
	uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
121
#ifdef CONFIG_DEBUG_BLK_CGROUP
122 123 124
	/* Time not charged to this cgroup */
	uint64_t unaccounted_time;

125 126 127 128
	/* Sum of number of IOs queued across all samples */
	uint64_t avg_queue_size_sum;
	/* Count of samples taken for average */
	uint64_t avg_queue_size_samples;
129 130
	/* How many times this group has been removed from service tree */
	unsigned long dequeue;
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145

	/* Total time spent waiting for it to be assigned a timeslice. */
	uint64_t group_wait_time;
	uint64_t start_group_wait_time;

	/* Time spent idling for this blkio_group */
	uint64_t idle_time;
	uint64_t start_idle_time;
	/*
	 * Total time when we have requests queued and do not contain the
	 * current active queue.
	 */
	uint64_t empty_time;
	uint64_t start_empty_time;
	uint16_t flags;
146 147 148
#endif
};

149 150 151 152
/* Per cpu blkio group stats */
struct blkio_group_stats_cpu {
	uint64_t sectors;
	uint64_t stat_arr_cpu[BLKIO_STAT_CPU_NR][BLKIO_STAT_TOTAL];
153
	struct u64_stats_sync syncp;
154 155
};

156 157 158 159 160 161
struct blkio_group_conf {
	unsigned int weight;
	unsigned int iops[2];
	u64 bps[2];
};

162 163 164 165 166 167 168 169 170
/* per-blkg per-policy data */
struct blkg_policy_data {
	/* the blkg this per-policy data belongs to */
	struct blkio_group *blkg;

	/* pol->pdata_size bytes of private data used by policy impl */
	char pdata[] __aligned(__alignof__(unsigned long long));
};

171
struct blkio_group {
172 173
	/* Pointer to the associated request_queue, RCU protected */
	struct request_queue __rcu *q;
174
	struct hlist_node blkcg_node;
175
	struct blkio_cgroup *blkcg;
V
Vivek Goyal 已提交
176 177
	/* Store cgroup path */
	char path[128];
178 179
	/* policy which owns this blk group */
	enum blkio_policy_id plid;
180

181 182 183
	/* Configuration */
	struct blkio_group_conf conf;

184 185 186
	/* Need to serialize the stats in the case of reset/update */
	spinlock_t stats_lock;
	struct blkio_group_stats stats;
187 188
	/* Per cpu stats pointer */
	struct blkio_group_stats_cpu __percpu *stats_cpu;
189 190

	struct blkg_policy_data *pd;
191 192
};

193
typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
194 195
typedef void (blkio_link_group_fn)(struct request_queue *q,
			struct blkio_group *blkg);
196 197
typedef void (blkio_unlink_group_fn)(struct request_queue *q,
			struct blkio_group *blkg);
198
typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
199
typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
200
			struct blkio_group *blkg, unsigned int weight);
201
typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
202
			struct blkio_group *blkg, u64 read_bps);
203
typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
204
			struct blkio_group *blkg, u64 write_bps);
205
typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
206
			struct blkio_group *blkg, unsigned int read_iops);
207
typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
208
			struct blkio_group *blkg, unsigned int write_iops);
209 210

struct blkio_policy_ops {
211
	blkio_init_group_fn *blkio_init_group_fn;
212
	blkio_link_group_fn *blkio_link_group_fn;
213
	blkio_unlink_group_fn *blkio_unlink_group_fn;
214
	blkio_clear_queue_fn *blkio_clear_queue_fn;
215
	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
216 217
	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
218 219
	blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
	blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
220 221 222 223 224
};

struct blkio_policy_type {
	struct list_head list;
	struct blkio_policy_ops ops;
225
	enum blkio_policy_id plid;
226
	size_t pdata_size;		/* policy specific private data size */
227 228
};

229 230 231 232
extern int blkcg_init_queue(struct request_queue *q);
extern void blkcg_drain_queue(struct request_queue *q);
extern void blkcg_exit_queue(struct request_queue *q);

233 234 235
/* Blkio controller policy registration */
extern void blkio_policy_register(struct blkio_policy_type *);
extern void blkio_policy_unregister(struct blkio_policy_type *);
236
extern void blkg_destroy_all(struct request_queue *q);
237

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
/**
 * blkg_to_pdata - get policy private data
 * @blkg: blkg of interest
 * @pol: policy of interest
 *
 * Return pointer to private data associated with the @blkg-@pol pair.
 */
static inline void *blkg_to_pdata(struct blkio_group *blkg,
			      struct blkio_policy_type *pol)
{
	return blkg ? blkg->pd->pdata : NULL;
}

/**
 * pdata_to_blkg - get blkg associated with policy private data
 * @pdata: policy private data of interest
 * @pol: policy @pdata is for
 *
 * @pdata is policy private data for @pol.  Determine the blkg it's
 * associated with.
 */
static inline struct blkio_group *pdata_to_blkg(void *pdata,
						struct blkio_policy_type *pol)
{
	if (pdata) {
		struct blkg_policy_data *pd =
			container_of(pdata, struct blkg_policy_data, pdata);
		return pd->blkg;
	}
	return NULL;
}

270 271 272 273 274
static inline char *blkg_path(struct blkio_group *blkg)
{
	return blkg->path;
}

275 276 277 278 279
#else

struct blkio_group {
};

280 281 282
struct blkio_policy_type {
};

283 284 285
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
286 287
static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
288
static inline void blkg_destroy_all(struct request_queue *q) { }
289

290 291 292 293
static inline void *blkg_to_pdata(struct blkio_group *blkg,
				struct blkio_policy_type *pol) { return NULL; }
static inline struct blkio_group *pdata_to_blkg(void *pdata,
				struct blkio_policy_type *pol) { return NULL; }
294 295
static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }

296 297
#endif

298
#define BLKIO_WEIGHT_MIN	10
299 300 301
#define BLKIO_WEIGHT_MAX	1000
#define BLKIO_WEIGHT_DEFAULT	500

V
Vivek Goyal 已提交
302
#ifdef CONFIG_DEBUG_BLK_CGROUP
303
void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg);
304
void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
305
				unsigned long dequeue);
306 307
void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg);
void blkiocg_update_idle_time_stats(struct blkio_group *blkg);
308
void blkiocg_set_start_empty_time(struct blkio_group *blkg);
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329

#define BLKG_FLAG_FNS(name)						\
static inline void blkio_mark_blkg_##name(				\
		struct blkio_group_stats *stats)			\
{									\
	stats->flags |= (1 << BLKG_##name);				\
}									\
static inline void blkio_clear_blkg_##name(				\
		struct blkio_group_stats *stats)			\
{									\
	stats->flags &= ~(1 << BLKG_##name);				\
}									\
static inline int blkio_blkg_##name(struct blkio_group_stats *stats)	\
{									\
	return (stats->flags & (1 << BLKG_##name)) != 0;		\
}									\

BLKG_FLAG_FNS(waiting)
BLKG_FLAG_FNS(idling)
BLKG_FLAG_FNS(empty)
#undef BLKG_FLAG_FNS
V
Vivek Goyal 已提交
330
#else
331
static inline void blkiocg_update_avg_queue_size_stats(
332
						struct blkio_group *blkg) {}
333 334
static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
						unsigned long dequeue) {}
335 336 337
static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
{}
static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {}
338
static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
V
Vivek Goyal 已提交
339 340
#endif

T
Tejun Heo 已提交
341
#ifdef CONFIG_BLK_CGROUP
342 343
extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
344
extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
345
extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
346 347 348 349 350 351 352
extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
				       struct request_queue *q,
				       enum blkio_policy_id plid);
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
				       struct request_queue *q,
				       enum blkio_policy_id plid,
				       bool for_root);
353
void blkiocg_update_timeslice_used(struct blkio_group *blkg,
354 355
					unsigned long time,
					unsigned long unaccounted_time);
356 357 358 359
void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
						bool direction, bool sync);
void blkiocg_update_completion_stats(struct blkio_group *blkg,
	uint64_t start_time, uint64_t io_start_time, bool direction, bool sync);
D
Divyesh Shah 已提交
360 361
void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
					bool sync);
362
void blkiocg_update_io_add_stats(struct blkio_group *blkg,
363
		struct blkio_group *curr_blkg, bool direction, bool sync);
364
void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
365
					bool direction, bool sync);
366
#else
367
struct cgroup;
368 369
static inline struct blkio_cgroup *
cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
370 371
static inline struct blkio_cgroup *
task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
372 373 374 375

static inline int
blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }

376 377
static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
					      void *key) { return NULL; }
378
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
379 380 381
						unsigned long time,
						unsigned long unaccounted_time)
{}
382 383 384 385 386
static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
				uint64_t bytes, bool direction, bool sync) {}
static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
		uint64_t start_time, uint64_t io_start_time, bool direction,
		bool sync) {}
D
Divyesh Shah 已提交
387 388
static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
						bool direction, bool sync) {}
389
static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
390
		struct blkio_group *curr_blkg, bool direction, bool sync) {}
391
static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
392
						bool direction, bool sync) {}
393 394
#endif
#endif /* _BLK_CGROUP_H */