diff --git a/fs/direct-io.c b/fs/direct-io.c index 389863f59cda384ecea6aae6ffa31c206fff4bac..4a588dbd11bfa57926b647e1e66bdd35a446cad4 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -36,6 +36,7 @@ #include #include #include +#include /* * How many user pages to map in one call to get_user_pages(). This determines @@ -1087,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio) * individual fields and will generate much worse code. This is important * for the whole file. */ -ssize_t -__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, +static inline ssize_t +do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) @@ -1097,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, size_t size; unsigned long addr; unsigned blkbits = inode->i_blkbits; - unsigned bdev_blkbits = 0; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; loff_t end = offset; @@ -1110,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (rw & WRITE) rw = WRITE_ODIRECT; - if (bdev) - bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); + /* + * Avoid references to bdev if not absolutely needed to give + * the early prefetch in the caller enough time. + */ if (offset & blocksize_mask) { if (bdev) - blkbits = bdev_blkbits; + blkbits = blksize_bits(bdev_logical_block_size(bdev)); blocksize_mask = (1 << blkbits) - 1; if (offset & blocksize_mask) goto out; @@ -1126,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, addr = (unsigned long)iov[seg].iov_base; size = iov[seg].iov_len; end += size; - if ((addr & blocksize_mask) || (size & blocksize_mask)) { + if (unlikely((addr & blocksize_mask) || + (size & blocksize_mask))) { if (bdev) - blkbits = bdev_blkbits; + blkbits = blksize_bits( + bdev_logical_block_size(bdev)); blocksize_mask = (1 << blkbits) - 1; - if ((addr & blocksize_mask) || (size & blocksize_mask)) + if ((addr & blocksize_mask) || (size & blocksize_mask)) goto out; } } @@ -1313,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, out: return retval; } + +ssize_t +__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int flags) +{ + /* + * The block device state is needed in the end to finally + * submit everything. Since it's likely to be cache cold + * prefetch it here as first thing to hide some of the + * latency. + * + * Attempt to prefetch the pieces we likely need later. + */ + prefetch(&bdev->bd_disk->part_tbl); + prefetch(bdev->bd_queue); + prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); + + return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_block, end_io, + submit_io, flags); +} + EXPORT_SYMBOL(__blockdev_direct_IO); static __init int dio_init(void)