block: add sysfs file for controlling io stats accounting

This allows us to turn off disk stat accounting completely, for the cases where the 0.5-1% reduction in system time is important. Signed-off-by: N Jens Axboe <jens.axboe@oracle.com>

block: add sysfs file for controlling io stats accounting
This allows us to turn off disk stat accounting completely, for the cases where the 0.5-1% reduction in system time is important. Signed-off-by: N Jens Axboe <jens.axboe@oracle.com>
bc58ba94 · Jens Axboe · 7598909e · bc58ba94 · bc58ba94 · bc58ba94
隐藏空白更改
内联并排

Showing with 88 addition and 36 deletion

block/blk-core.c block/blk-core.c +54 -36

block/blk-sysfs.c block/blk-sysfs.c +28 -0

include/linux/blkdev.h include/linux/blkdev.h +6 -0

未找到文件。
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -64,11 +64,12 @@ static struct workqueue_struct *kblockd_workqueue;
 static void drive_stat_acct(struct request *rq, int new_io)
 {
+	struct gendisk *disk = rq->rq_disk;
 	struct hd_struct *part;
 	int rw = rq_data_dir(rq);
 	int cpu;
-	if (!blk_fs_request(rq) || !rq->rq_disk)
+	if (!blk_fs_request(rq) || !disk || !blk_queue_io_stat(disk->queue))
 		return;
 	cpu = part_stat_lock();
@@ -599,8 +600,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
-	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER |
+	q->queue_flags		= QUEUE_FLAG_DEFAULT;
-				   1 << QUEUE_FLAG_STACKABLE);
 	q->queue_lock		= lock;
 	blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
@@ -1663,6 +1663,55 @@ void blkdev_dequeue_request(struct request *req)
 }
 EXPORT_SYMBOL(blkdev_dequeue_request);
+static void blk_account_io_completion(struct request *req, unsigned int bytes)
+{
+	struct gendisk *disk = req->rq_disk;
+	if (!disk || !blk_queue_io_stat(disk->queue))
+		return;
+	if (blk_fs_request(req)) {
+		const int rw = rq_data_dir(req);
+		struct hd_struct *part;
+		int cpu;
+		cpu = part_stat_lock();
+		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
+		part_stat_unlock();
+	}
+}
+static void blk_account_io_done(struct request *req)
+{
+	struct gendisk *disk = req->rq_disk;
+	if (!disk || !blk_queue_io_stat(disk->queue))
+		return;
+	/*
+	 * Account IO completion.  bar_rq isn't accounted as a normal
+	 * IO on queueing nor completion.  Accounting the containing
+	 * request is enough.
+	 */
+	if (blk_fs_request(req) && req != &req->q->bar_rq) {
+		unsigned long duration = jiffies - req->start_time;
+		const int rw = rq_data_dir(req);
+		struct hd_struct *part;
+		int cpu;
+		cpu = part_stat_lock();
+		part = disk_map_sector_rcu(disk, req->sector);
+		part_stat_inc(cpu, part, ios[rw]);
+		part_stat_add(cpu, part, ticks[rw], duration);
+		part_round_stats(cpu, part);
+		part_dec_in_flight(part);
+		part_stat_unlock();
+	}
+}
 /**
 * __end_that_request_first - end I/O on a request
 * @req:      the request being processed
@@ -1698,16 +1747,7 @@ static int __end_that_request_first(struct request *req, int error,
 				(unsigned long long)req->sector);
 	}
-	if (blk_fs_request(req) && req->rq_disk) {
+	blk_account_io_completion(req, nr_bytes);
-		const int rw = rq_data_dir(req);
-		struct hd_struct *part;
-		int cpu;
-		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
-		part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
-		part_stat_unlock();
-	}
 	total_bytes = bio_nbytes = 0;
 	while ((bio = req->bio) != NULL) {
@@ -1787,8 +1827,6 @@ static int __end_that_request_first(struct request *req, int error,
 */
 static void end_that_request_last(struct request *req, int error)
 {
-	struct gendisk *disk = req->rq_disk;
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
@@ -1800,27 +1838,7 @@ static void end_that_request_last(struct request *req, int error)
 	blk_delete_timer(req);
-	/*
+	blk_account_io_done(req);
-	 * Account IO completion.  bar_rq isn't accounted as a normal
-	 * IO on queueing nor completion.  Accounting the containing
-	 * request is enough.
-	 */
-	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
-		unsigned long duration = jiffies - req->start_time;
-		const int rw = rq_data_dir(req);
-		struct hd_struct *part;
-		int cpu;
-		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(disk, req->sector);
-		part_stat_inc(cpu, part, ios[rw]);
-		part_stat_add(cpu, part, ticks[rw], duration);
-		part_round_stats(cpu, part);
-		part_dec_in_flight(part);
-		part_stat_unlock();
-	}
 	if (req->end_io)
 		req->end_io(req, error);

--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -197,6 +197,27 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 	return ret;
 }
+static ssize_t queue_iostats_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(blk_queue_io_stat(q), page);
+}
+static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
+				   size_t count)
+{
+	unsigned long stats;
+	ssize_t ret = queue_var_store(&stats, page, count);
+	spin_lock_irq(q->queue_lock);
+	if (stats)
+		queue_flag_set(QUEUE_FLAG_IO_STAT, q);
+	else
+		queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
+	spin_unlock_irq(q->queue_lock);
+	return ret;
+}
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -249,6 +270,12 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = {
 	.store = queue_rq_affinity_store,
 };
+static struct queue_sysfs_entry queue_iostats_entry = {
+	.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_iostats_show,
+	.store = queue_iostats_store,
+};
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -259,6 +286,7 @@ static struct attribute *default_attrs[] = {
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
+	&queue_iostats_entry.attr,
 	NULL,
 };

--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -451,6 +451,11 @@ struct request_queue
 #define QUEUE_FLAG_STACKABLE   13	/* supports request stacking */
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
+#define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
+#define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
+				 (1 << QUEUE_FLAG_CLUSTER) |		\
+				  1 << QUEUE_FLAG_STACKABLE)
 static inline int queue_is_locked(struct request_queue *q)
 {
@@ -567,6 +572,7 @@ enum {
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
+#define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)