diff --git a/block/elevator.c b/block/elevator.c index ec23ca02f2fed9e57a8977c36cd331a36cd5fbf9..952aee04a68ad7cc240b8ecb7a8a0e1590b9a33f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -712,6 +712,14 @@ struct request *elv_next_request(struct request_queue *q) int ret; while ((rq = __elv_next_request(q)) != NULL) { + /* + * Kill the empty barrier place holder, the driver must + * not ever see it. + */ + if (blk_empty_barrier(rq)) { + end_queued_request(rq, 1); + continue; + } if (!(rq->cmd_flags & REQ_STARTED)) { /* * This is the first time the device driver diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 548f0d8266792ed03e75b6d1e93552de2bb747f7..4fde3a3c92d3119ba8d60aff9e0db5570d23d1a6 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -458,9 +458,12 @@ static inline struct request *start_ordered(struct request_queue *q, * Queue ordered sequence. As we stack them at the head, we * need to queue in reverse order. Note that we rely on that * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs - * request gets inbetween ordered sequence. + * request gets inbetween ordered sequence. If this request is + * an empty barrier, we don't need to do a postflush ever since + * there will be no data written between the pre and post flush. + * Hence a single flush will suffice. */ - if (q->ordered & QUEUE_ORDERED_POSTFLUSH) + if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) queue_flush(q, QUEUE_ORDERED_POSTFLUSH); else q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; @@ -484,7 +487,7 @@ static inline struct request *start_ordered(struct request_queue *q, int blk_do_ordered(struct request_queue *q, struct request **rqp) { struct request *rq = *rqp; - int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); + const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); if (!q->ordseq) { if (!is_barrier) @@ -3054,7 +3057,7 @@ static inline void blk_partition_remap(struct bio *bio) { struct block_device *bdev = bio->bi_bdev; - if (bdev != bdev->bd_contains) { + if (bio_sectors(bio) && bdev != bdev->bd_contains) { struct hd_struct *p = bdev->bd_part; const int rw = bio_data_dir(bio); @@ -3313,23 +3316,32 @@ void submit_bio(int rw, struct bio *bio) { int count = bio_sectors(bio); - BIO_BUG_ON(!bio->bi_size); - BIO_BUG_ON(!bio->bi_io_vec); bio->bi_rw |= rw; - if (rw & WRITE) { - count_vm_events(PGPGOUT, count); - } else { - task_io_account_read(bio->bi_size); - count_vm_events(PGPGIN, count); - } - if (unlikely(block_dump)) { - char b[BDEVNAME_SIZE]; - printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", - current->comm, current->pid, - (rw & WRITE) ? "WRITE" : "READ", - (unsigned long long)bio->bi_sector, - bdevname(bio->bi_bdev,b)); + /* + * If it's a regular read/write or a barrier with data attached, + * go through the normal accounting stuff before submission. + */ + if (!bio_empty_barrier(bio)) { + + BIO_BUG_ON(!bio->bi_size); + BIO_BUG_ON(!bio->bi_io_vec); + + if (rw & WRITE) { + count_vm_events(PGPGOUT, count); + } else { + task_io_account_read(bio->bi_size); + count_vm_events(PGPGIN, count); + } + + if (unlikely(block_dump)) { + char b[BDEVNAME_SIZE]; + printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", + current->comm, current->pid, + (rw & WRITE) ? "WRITE" : "READ", + (unsigned long long)bio->bi_sector, + bdevname(bio->bi_bdev,b)); + } } generic_make_request(bio); @@ -3405,6 +3417,14 @@ static int __end_that_request_first(struct request *req, int uptodate, while ((bio = req->bio) != NULL) { int nbytes; + /* + * For an empty barrier request, the low level driver must + * store a potential error location in ->sector. We pass + * that back up in ->bi_sector. + */ + if (blk_empty_barrier(req)) + bio->bi_sector = req->sector; + if (nr_bytes >= bio->bi_size) { req->bio = bio->bi_next; nbytes = bio->bi_size; diff --git a/include/linux/bio.h b/include/linux/bio.h index 089a8bc55dd42f8c60f715b24e43fe8f781de4dc..4da441337d6effd31990afc5616d7772010d0f47 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -176,13 +176,28 @@ struct bio { #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) -#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) +#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) + +static inline unsigned int bio_cur_sectors(struct bio *bio) +{ + if (bio->bi_vcnt) + return bio_iovec(bio)->bv_len >> 9; + + return 0; +} + +static inline void *bio_data(struct bio *bio) +{ + if (bio->bi_vcnt) + return page_address(bio_page(bio)) + bio_offset(bio); + + return NULL; +} /* * will die diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 610967992ddb48c24635749cd66ded0c84eceea3..fb2ff749dc1f3edc2e5aaaaf0c7abf3dab807268 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -540,6 +540,7 @@ enum { #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) +#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) diff --git a/mm/bounce.c b/mm/bounce.c index 3b549bf31f7dc474996bf30a0401eea82b490466..b6d2d0f1019b3975640a5b0388825b44112be7de 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -264,6 +264,12 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) { mempool_t *pool; + /* + * Data-less bio, nothing to bounce + */ + if (bio_empty_barrier(*bio_orig)) + return; + /* * for non-isa bounce case, just check if the bounce pfn is equal * to or bigger than the highest pfn in the system -- in that case,