提交 ce40be7a 编写于 作者: L Linus Torvalds

Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block

Pull block IO update from Jens Axboe:
 "Core block IO bits for 3.7.  Not a huge round this time, it contains:

   - First series from Kent cleaning up and generalizing bio allocation
     and freeing.

   - WRITE_SAME support from Martin.

   - Mikulas patches to prevent O_DIRECT crashes when someone changes
     the block size of a device.

   - Make bio_split() work on data-less bio's (like trim/discards).

   - A few other minor fixups."

Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew
Morton.  It is due to the VM no longer using a prio-tree (see commit
6b2dbba8: "mm: replace vma prio_tree with an interval tree").

So make set_blocksize() use mapping_mapped() instead of open-coding the
internal VM knowledge that has changed.

* 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits)
  block: makes bio_split support bio without data
  scatterlist: refactor the sg_nents
  scatterlist: add sg_nents
  fs: fix include/percpu-rwsem.h export error
  percpu-rw-semaphore: fix documentation typos
  fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared
  blockdev: turn a rw semaphore into a percpu rw semaphore
  Fix a crash when block device is read and block size is changed at the same time
  block: fix request_queue->flags initialization
  block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue()
  block: ioctl to zero block ranges
  block: Make blkdev_issue_zeroout use WRITE SAME
  block: Implement support for WRITE SAME
  block: Consolidate command flag and queue limit checks for merges
  block: Clean up special command handling logic
  block/blk-tag.c: Remove useless kfree
  block: remove the duplicated setting for congestion_threshold
  block: reject invalid queue attribute values
  block: Add bio_clone_bioset(), bio_clone_kmalloc()
  block: Consolidate bio_alloc_bioset(), bio_kmalloc()
  ...
......@@ -206,3 +206,17 @@ Description:
when a discarded area is read the discard_zeroes_data
parameter will be set to one. Otherwise it will be 0 and
the result of reading a discarded area is undefined.
What: /sys/block/<disk>/queue/write_same_max_bytes
Date: January 2012
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Some devices support a write same operation in which a
single data block can be written to a range of several
contiguous blocks on storage. This can be used to wipe
areas on disk or to initialize drives in a RAID
configuration. write_same_max_bytes indicates how many
bytes can be written in a single write same command. If
write_same_max_bytes is 0, write same is not supported
by the device.
......@@ -465,7 +465,6 @@ struct bio {
bio_end_io_t *bi_end_io; /* bi_end_io (bio) */
atomic_t bi_cnt; /* pin count: free when it hits zero */
void *bi_private;
bio_destructor_t *bi_destructor; /* bi_destructor (bio) */
};
With this multipage bio design:
......@@ -647,10 +646,6 @@ for a non-clone bio. There are the 6 pools setup for different size biovecs,
so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
given size from these slabs.
The bi_destructor() routine takes into account the possibility of the bio
having originated from a different source (see later discussions on
n/w to block transfers and kvec_cb)
The bio_get() routine may be used to hold an extra reference on a bio prior
to i/o submission, if the bio fields are likely to be accessed after the
i/o is issued (since the bio may otherwise get freed in case i/o completion
......
Percpu rw semaphores
--------------------
Percpu rw semaphores is a new read-write semaphore design that is
optimized for locking for reading.
The problem with traditional read-write semaphores is that when multiple
cores take the lock for reading, the cache line containing the semaphore
is bouncing between L1 caches of the cores, causing performance
degradation.
Locking for reading is very fast, it uses RCU and it avoids any atomic
instruction in the lock and unlock path. On the other hand, locking for
writing is very expensive, it calls synchronize_rcu() that can take
hundreds of milliseconds.
The lock is declared with "struct percpu_rw_semaphore" type.
The lock is initialized percpu_init_rwsem, it returns 0 on success and
-ENOMEM on allocation failure.
The lock must be freed with percpu_free_rwsem to avoid memory leak.
The lock is locked for read with percpu_down_read, percpu_up_read and
for write with percpu_down_write, percpu_up_write.
The idea of using RCU for optimized rw-lock was introduced by
Eric Dumazet <eric.dumazet@gmail.com>.
The code was written by Mikulas Patocka <mpatocka@redhat.com>
......@@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
/*
* A queue starts its life with bypass turned on to avoid
* unnecessary bypass on/off overhead and nasty surprises during
* init. The initial bypass will be finished at the end of
* blk_init_allocated_queue().
* init. The initial bypass will be finished when the queue is
* registered by blk_register_queue().
*/
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
......@@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
q->request_fn = rfn;
q->prep_rq_fn = NULL;
q->unprep_rq_fn = NULL;
q->queue_flags = QUEUE_FLAG_DEFAULT;
q->queue_flags |= QUEUE_FLAG_DEFAULT;
/* Override internal queue lock with supplied lock pointer */
if (lock)
......@@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
/* init elevator */
if (elevator_init(q, NULL))
return NULL;
blk_queue_congestion_threshold(q);
/* all done, end the initial bypass */
blk_queue_bypass_end(q);
return q;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
......@@ -1657,8 +1652,8 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
nr_sectors > queue_max_hw_sectors(q))) {
if (likely(bio_is_rw(bio) &&
nr_sectors > queue_max_hw_sectors(q))) {
printk(KERN_ERR "bio too big device %s (%u > %u)\n",
bdevname(bio->bi_bdev, b),
bio_sectors(bio),
......@@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio)
if ((bio->bi_rw & REQ_DISCARD) &&
(!blk_queue_discard(q) ||
((bio->bi_rw & REQ_SECURE) &&
!blk_queue_secdiscard(q)))) {
((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
err = -EOPNOTSUPP;
goto end_io;
}
if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
err = -EOPNOTSUPP;
goto end_io;
}
......@@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request);
*/
void submit_bio(int rw, struct bio *bio)
{
int count = bio_sectors(bio);
bio->bi_rw |= rw;
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
if (bio_has_data(bio)) {
unsigned int count;
if (unlikely(rw & REQ_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
count = bio_sectors(bio);
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
......@@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio);
*/
int blk_rq_check_limits(struct request_queue *q, struct request *rq)
{
if (rq->cmd_flags & REQ_DISCARD)
if (!rq_mergeable(rq))
return 0;
if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
printk(KERN_ERR "%s: over max size limit.\n", __func__);
return -EIO;
}
......@@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
req->buffer = bio_data(req->bio);
/* update sector only for requests with clear definition of sector */
if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
if (req->cmd_type == REQ_TYPE_FS)
req->__sector += total_bytes >> 9;
/* mixed attributes always follow the first bio */
......@@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
blk_rq_init(NULL, rq);
__rq_for_each_bio(bio_src, rq_src) {
bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
bio = bio_clone_bioset(bio_src, gfp_mask, bs);
if (!bio)
goto free_and_out;
__bio_clone(bio, bio_src);
if (bio_integrity(bio_src) &&
bio_integrity_clone(bio, bio_src, gfp_mask, bs))
goto free_and_out;
if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;
......@@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
free_and_out:
if (bio)
bio_free(bio, bs);
bio_put(bio);
blk_rq_unprep_clone(rq);
return -ENOMEM;
......
......@@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);
/**
* blkdev_issue_write_same - queue a write same operation
* @bdev: target blockdev
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @page: page containing data to write
*
* Description:
* Issue a write same request for the sectors in question.
*/
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask,
struct page *page)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_write_same_sectors;
struct bio_batch bb;
struct bio *bio;
int ret = 0;
if (!q)
return -ENXIO;
max_write_same_sectors = q->limits.max_write_same_sectors;
if (max_write_same_sectors == 0)
return -EOPNOTSUPP;
atomic_set(&bb.done, 1);
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;
while (nr_sects) {
bio = bio_alloc(gfp_mask, 1);
if (!bio) {
ret = -ENOMEM;
break;
}
bio->bi_sector = sector;
bio->bi_end_io = bio_batch_end_io;
bio->bi_bdev = bdev;
bio->bi_private = &bb;
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
if (nr_sects > max_write_same_sectors) {
bio->bi_size = max_write_same_sectors << 9;
nr_sects -= max_write_same_sectors;
sector += max_write_same_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
}
atomic_inc(&bb.done);
submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
}
/* Wait for bios in-flight */
if (!atomic_dec_and_test(&bb.done))
wait_for_completion(&wait);
if (!test_bit(BIO_UPTODATE, &bb.flags))
ret = -ENOTSUPP;
return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);
/**
* blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
......@@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard);
* Generate and issue number of bios with zerofiled pages.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask)
{
int ret;
......@@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
return ret;
}
/**
* blkdev_issue_zeroout - zero-fill a block range
* @bdev: blockdev to write
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
*
* Description:
* Generate and issue number of bios with zerofiled pages.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask)
{
if (bdev_write_same(bdev)) {
unsigned char bdn[BDEVNAME_SIZE];
if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
ZERO_PAGE(0)))
return 0;
bdevname(bdev, bdn);
pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
}
return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
}
EXPORT_SYMBOL(blkdev_issue_zeroout);
......@@ -275,14 +275,8 @@ static inline int ll_new_hw_segment(struct request_queue *q,
int ll_back_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short max_sectors;
if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
max_sectors = queue_max_hw_sectors(q);
else
max_sectors = queue_max_sectors(q);
if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
if (blk_rq_sectors(req) + bio_sectors(bio) >
blk_rq_get_max_sectors(req)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
......@@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
int ll_front_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short max_sectors;
if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
max_sectors = queue_max_hw_sectors(q);
else
max_sectors = queue_max_sectors(q);
if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
if (blk_rq_sectors(req) + bio_sectors(bio) >
blk_rq_get_max_sectors(req)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
......@@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
/*
* Will it become too large?
*/
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q))
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
blk_rq_get_max_sectors(req))
return 0;
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
......@@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (!rq_mergeable(req) || !rq_mergeable(next))
return 0;
/*
* Don't merge file system requests and discard requests
*/
if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD))
return 0;
/*
* Don't merge discard requests and secure discard requests
*/
if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE))
if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
return 0;
/*
......@@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;
if (req->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(req->bio, next->bio))
return 0;
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
......@@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
{
if (!rq_mergeable(rq))
if (!rq_mergeable(rq) || !bio_mergeable(bio))
return false;
/* don't merge file system requests and discard requests */
if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
return false;
/* don't merge discard requests and secure discard requests */
if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
return false;
/* different data direction or already started, don't merge */
......@@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_integrity(bio) != blk_integrity_rq(rq))
return false;
/* must be using the same buffer */
if (rq->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(rq->bio, bio))
return false;
return true;
}
......
......@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
......@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX;
lim->max_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
......@@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
* blk_queue_max_write_same_sectors - set max sectors for a single write same
* @q: the request queue for the device
* @max_write_same_sectors: maximum number of sectors to write per command
**/
void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors)
{
q->limits.max_write_same_sectors = max_write_same_sectors;
}
EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
......@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_write_same_sectors = min(t->max_write_same_sectors,
b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
......
......@@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page)
static ssize_t
queue_var_store(unsigned long *var, const char *page, size_t count)
{
char *p = (char *) page;
int err;
unsigned long v;
err = strict_strtoul(page, 10, &v);
if (err || v > UINT_MAX)
return -EINVAL;
*var = v;
*var = simple_strtoul(p, &p, 10);
return count;
}
......@@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
return -EINVAL;
ret = queue_var_store(&nr, page, count);
if (ret < 0)
return ret;
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
......@@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
unsigned long ra_kb;
ssize_t ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
return ret;
......@@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(queue_discard_zeroes_data(q), page);
}
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
{
return sprintf(page, "%llu\n",
(unsigned long long)q->limits.max_write_same_sectors << 9);
}
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
......@@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
if (ret < 0)
return ret;
if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
return -EINVAL;
......@@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
unsigned long nm;
ssize_t ret = queue_var_store(&nm, page, count);
if (ret < 0)
return ret;
spin_lock_irq(q->queue_lock);
queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
......@@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
unsigned long val;
ret = queue_var_store(&val, page, count);
if (ret < 0)
return ret;
spin_lock_irq(q->queue_lock);
if (val == 2) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
......@@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.show = queue_discard_zeroes_data_show,
};
static struct queue_sysfs_entry queue_write_same_max_entry = {
.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
.show = queue_write_same_max_show,
};
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
......@@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
......@@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk)
if (WARN_ON(!q))
return -ENXIO;
/*
* Initialization must be complete by now. Finish the initial
* bypass from queue allocation.
*/
blk_queue_bypass_end(q);
ret = blk_trace_init_sysfs(dev);
if (ret)
return ret;
......
......@@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
tags = __blk_queue_init_tags(q, depth);
if (!tags)
goto fail;
return -ENOMEM;
} else if (q->queue_tags) {
rc = blk_queue_resize_tags(q, depth);
if (rc)
......@@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
INIT_LIST_HEAD(&q->tag_busy_list);
return 0;
fail:
kfree(tags);
return -ENOMEM;
}
EXPORT_SYMBOL(blk_queue_init_tags);
......
......@@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
*
* a) it's attached to a gendisk, and
* b) the queue had IO stats enabled when this request was started, and
* c) it's a file system request or a discard request
* c) it's a file system request
*/
static inline int blk_do_io_stat(struct request *rq)
{
return rq->rq_disk &&
(rq->cmd_flags & REQ_IO_STAT) &&
(rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD));
(rq->cmd_type == REQ_TYPE_FS);
}
/*
......
......@@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
if (rq->cmd_flags & REQ_SOFTBARRIER) {
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD)) {
if (rq->cmd_type == REQ_TYPE_FS) {
q->end_sector = rq_end_sector(rq);
q->boundary_rq = rq;
}
......@@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
if (elv_attempt_insert_merge(q, rq))
break;
case ELEVATOR_INSERT_SORT:
BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
!(rq->cmd_flags & REQ_DISCARD));
BUG_ON(rq->cmd_type != REQ_TYPE_FS);
rq->cmd_flags |= REQ_SORTED;
q->nr_sorted++;
if (rq_mergeable(rq)) {
......
......@@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
}
static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
uint64_t len)
{
if (start & 511)
return -EINVAL;
if (len & 511)
return -EINVAL;
start >>= 9;
len >>= 9;
if (start + len > (i_size_read(bdev->bd_inode) >> 9))
return -EINVAL;
return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
}
static int put_ushort(unsigned long arg, unsigned short val)
{
return put_user(val, (unsigned short __user *)arg);
......@@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return blk_ioctl_discard(bdev, range[0], range[1],
cmd == BLKSECDISCARD);
}
case BLKZEROOUT: {
uint64_t range[2];
if (!(mode & FMODE_WRITE))
return -EBADF;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
return -EFAULT;
return blk_ioctl_zeroout(bdev, range[0], range[1]);
}
case HDIO_GETGEO: {
struct hd_geometry geo;
......
......@@ -162,23 +162,12 @@ static const struct block_device_operations drbd_ops = {
.release = drbd_release,
};
static void bio_destructor_drbd(struct bio *bio)
{
bio_free(bio, drbd_md_io_bio_set);
}
struct bio *bio_alloc_drbd(gfp_t gfp_mask)
{
struct bio *bio;
if (!drbd_md_io_bio_set)
return bio_alloc(gfp_mask, 1);
bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
if (!bio)
return NULL;
bio->bi_destructor = bio_destructor_drbd;
return bio;
return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
}
#ifdef __CHECKER__
......
......@@ -266,11 +266,10 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
struct bio *tmp, *new_chain = NULL, *tail = NULL;
while (old_chain) {
tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
tmp = bio_clone_kmalloc(old_chain, gfpmask);
if (!tmp)
goto err_out;
__bio_clone(tmp, old_chain);
tmp->bi_bdev = NULL;
gfpmask &= ~__GFP_WAIT;
tmp->bi_next = NULL;
......
......@@ -522,38 +522,6 @@ static void pkt_bio_finished(struct pktcdvd_device *pd)
}
}
static void pkt_bio_destructor(struct bio *bio)
{
kfree(bio->bi_io_vec);
kfree(bio);
}
static struct bio *pkt_bio_alloc(int nr_iovecs)
{
struct bio_vec *bvl = NULL;
struct bio *bio;
bio = kmalloc(sizeof(struct bio), GFP_KERNEL);
if (!bio)
goto no_bio;
bio_init(bio);
bvl = kcalloc(nr_iovecs, sizeof(struct bio_vec), GFP_KERNEL);
if (!bvl)
goto no_bvl;
bio->bi_max_vecs = nr_iovecs;
bio->bi_io_vec = bvl;
bio->bi_destructor = pkt_bio_destructor;
return bio;
no_bvl:
kfree(bio);
no_bio:
return NULL;
}
/*
* Allocate a packet_data struct
*/
......@@ -567,7 +535,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
goto no_pkt;
pkt->frames = frames;
pkt->w_bio = pkt_bio_alloc(frames);
pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames);
if (!pkt->w_bio)
goto no_bio;
......@@ -581,9 +549,10 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
bio_list_init(&pkt->orig_bios);
for (i = 0; i < frames; i++) {
struct bio *bio = pkt_bio_alloc(1);
struct bio *bio = bio_kmalloc(GFP_KERNEL, 1);
if (!bio)
goto no_rd_bio;
pkt->r_bios[i] = bio;
}
......@@ -1111,21 +1080,17 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
* Schedule reads for missing parts of the packet.
*/
for (f = 0; f < pkt->frames; f++) {
struct bio_vec *vec;
int p, offset;
if (written[f])
continue;
bio = pkt->r_bios[f];
vec = bio->bi_io_vec;
bio_init(bio);
bio->bi_max_vecs = 1;
bio_reset(bio);
bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
bio->bi_bdev = pd->bdev;
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
bio->bi_io_vec = vec;
bio->bi_destructor = pkt_bio_destructor;
p = (f * CD_FRAMESIZE) / PAGE_SIZE;
offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
......@@ -1418,14 +1383,11 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
}
/* Start the write request */
bio_init(pkt->w_bio);
pkt->w_bio->bi_max_vecs = PACKET_MAX_SIZE;
bio_reset(pkt->w_bio);
pkt->w_bio->bi_sector = pkt->sector;
pkt->w_bio->bi_bdev = pd->bdev;
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
pkt->w_bio->bi_io_vec = bvec;
pkt->w_bio->bi_destructor = pkt_bio_destructor;
for (f = 0; f < pkt->frames; f++)
if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
BUG();
......
......@@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations raw_fops = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_read = blkdev_aio_read,
.write = do_sync_write,
.aio_write = blkdev_aio_write,
.fsync = blkdev_fsync,
......
......@@ -798,14 +798,6 @@ static int crypt_convert(struct crypt_config *cc,
return 0;
}
static void dm_crypt_bio_destructor(struct bio *bio)
{
struct dm_crypt_io *io = bio->bi_private;
struct crypt_config *cc = io->cc;
bio_free(bio, cc->bs);
}
/*
* Generate a new unfragmented bio with the given size
* This should never violate the device limitations
......@@ -974,7 +966,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
clone->bi_end_io = crypt_endio;
clone->bi_bdev = cc->dev->bdev;
clone->bi_rw = io->base_bio->bi_rw;
clone->bi_destructor = dm_crypt_bio_destructor;
}
static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
......@@ -988,19 +979,14 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
* copy the required bvecs because we need the original
* one in order to decrypt the whole bio data *afterwards*.
*/
clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs);
clone = bio_clone_bioset(base_bio, gfp, cc->bs);
if (!clone)
return 1;
crypt_inc_pending(io);
clone_init(io, clone);
clone->bi_idx = 0;
clone->bi_vcnt = bio_segments(base_bio);
clone->bi_size = base_bio->bi_size;
clone->bi_sector = cc->start + io->sector;
memcpy(clone->bi_io_vec, bio_iovec(base_bio),
sizeof(struct bio_vec) * clone->bi_vcnt);
generic_make_request(clone);
return 0;
......
......@@ -249,16 +249,6 @@ static void vm_dp_init(struct dpages *dp, void *data)
dp->context_ptr = data;
}
static void dm_bio_destructor(struct bio *bio)
{
unsigned region;
struct io *io;
retrieve_io_and_region_from_bio(bio, &io, &region);
bio_free(bio, io->client->bios);
}
/*
* Functions for getting the pages from kernel memory.
*/
......@@ -317,7 +307,6 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev;
bio->bi_end_io = endio;
bio->bi_destructor = dm_bio_destructor;
store_io_and_region_in_bio(bio, io, region);
if (rw & REQ_DISCARD) {
......
......@@ -86,12 +86,17 @@ struct dm_rq_target_io {
};
/*
* For request-based dm.
* One of these is allocated per bio.
* For request-based dm - the bio clones we allocate are embedded in these
* structs.
*
* We allocate these with bio_alloc_bioset, using the front_pad parameter when
* the bioset is created - this means the bio has to come at the end of the
* struct.
*/
struct dm_rq_clone_bio_info {
struct bio *orig;
struct dm_rq_target_io *tio;
struct bio clone;
};
union map_info *dm_get_mapinfo(struct bio *bio)
......@@ -211,6 +216,11 @@ struct dm_md_mempools {
static struct kmem_cache *_io_cache;
static struct kmem_cache *_tio_cache;
static struct kmem_cache *_rq_tio_cache;
/*
* Unused now, and needs to be deleted. But since io_pool is overloaded and it's
* still used for _io_cache, I'm leaving this for a later cleanup
*/
static struct kmem_cache *_rq_bio_info_cache;
static int __init local_init(void)
......@@ -467,16 +477,6 @@ static void free_rq_tio(struct dm_rq_target_io *tio)
mempool_free(tio, tio->md->tio_pool);
}
static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
{
return mempool_alloc(md->io_pool, GFP_ATOMIC);
}
static void free_bio_info(struct dm_rq_clone_bio_info *info)
{
mempool_free(info, info->tio->md->io_pool);
}
static int md_in_flight(struct mapped_device *md)
{
return atomic_read(&md->pending[READ]) +
......@@ -681,11 +681,6 @@ static void clone_endio(struct bio *bio, int error)
}
}
/*
* Store md for cleanup instead of tio which is about to get freed.
*/
bio->bi_private = md->bs;
free_tio(md, tio);
bio_put(bio);
dec_pending(io, error);
......@@ -1036,11 +1031,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
/* error the io and bail out, or requeue it if needed */
md = tio->io->md;
dec_pending(tio->io, r);
/*
* Store bio_set for cleanup.
*/
clone->bi_end_io = NULL;
clone->bi_private = md->bs;
bio_put(clone);
free_tio(md, tio);
} else if (r) {
......@@ -1059,13 +1049,6 @@ struct clone_info {
unsigned short idx;
};
static void dm_bio_destructor(struct bio *bio)
{
struct bio_set *bs = bio->bi_private;
bio_free(bio, bs);
}
/*
* Creates a little bio that just does part of a bvec.
*/
......@@ -1077,7 +1060,6 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
struct bio_vec *bv = bio->bi_io_vec + idx;
clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
clone->bi_destructor = dm_bio_destructor;
*clone->bi_io_vec = *bv;
clone->bi_sector = sector;
......@@ -1090,7 +1072,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
clone->bi_flags |= 1 << BIO_CLONED;
if (bio_integrity(bio)) {
bio_integrity_clone(clone, bio, GFP_NOIO, bs);
bio_integrity_clone(clone, bio, GFP_NOIO);
bio_integrity_trim(clone,
bio_sector_offset(bio, idx, offset), len);
}
......@@ -1109,7 +1091,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
__bio_clone(clone, bio);
clone->bi_destructor = dm_bio_destructor;
clone->bi_sector = sector;
clone->bi_idx = idx;
clone->bi_vcnt = idx + bv_count;
......@@ -1117,7 +1098,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
clone->bi_flags &= ~(1 << BIO_SEG_VALID);
if (bio_integrity(bio)) {
bio_integrity_clone(clone, bio, GFP_NOIO, bs);
bio_integrity_clone(clone, bio, GFP_NOIO);
if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
bio_integrity_trim(clone,
......@@ -1152,9 +1133,8 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
* ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
* and discard, so no need for concern about wasted bvec allocations.
*/
clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs);
__bio_clone(clone, ci->bio);
clone->bi_destructor = dm_bio_destructor;
clone = bio_clone_bioset(ci->bio, GFP_NOIO, ci->md->bs);
if (len) {
clone->bi_sector = ci->sector;
clone->bi_size = to_bytes(len);
......@@ -1484,30 +1464,17 @@ void dm_dispatch_request(struct request *rq)
}
EXPORT_SYMBOL_GPL(dm_dispatch_request);
static void dm_rq_bio_destructor(struct bio *bio)
{
struct dm_rq_clone_bio_info *info = bio->bi_private;
struct mapped_device *md = info->tio->md;
free_bio_info(info);
bio_free(bio, md->bs);
}
static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
void *data)
{
struct dm_rq_target_io *tio = data;
struct mapped_device *md = tio->md;
struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
if (!info)
return -ENOMEM;
struct dm_rq_clone_bio_info *info =
container_of(bio, struct dm_rq_clone_bio_info, clone);
info->orig = bio_orig;
info->tio = tio;
bio->bi_end_io = end_clone_bio;
bio->bi_private = info;
bio->bi_destructor = dm_rq_bio_destructor;
return 0;
}
......@@ -2771,7 +2738,10 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
if (!pools->tio_pool)
goto free_io_pool_and_out;
pools->bs = bioset_create(pool_size, 0);
pools->bs = (type == DM_TYPE_BIO_BASED) ?
bioset_create(pool_size, 0) :
bioset_create(pool_size,
offsetof(struct dm_rq_clone_bio_info, clone));
if (!pools->bs)
goto free_tio_pool_and_out;
......
......@@ -155,32 +155,17 @@ static int start_readonly;
* like bio_clone, but with a local bio set
*/
static void mddev_bio_destructor(struct bio *bio)
{
struct mddev *mddev, **mddevp;
mddevp = (void*)bio;
mddev = mddevp[-1];
bio_free(bio, mddev->bio_set);
}
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
struct mddev *mddev)
{
struct bio *b;
struct mddev **mddevp;
if (!mddev || !mddev->bio_set)
return bio_alloc(gfp_mask, nr_iovecs);
b = bio_alloc_bioset(gfp_mask, nr_iovecs,
mddev->bio_set);
b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
if (!b)
return NULL;
mddevp = (void*)b;
mddevp[-1] = mddev;
b->bi_destructor = mddev_bio_destructor;
return b;
}
EXPORT_SYMBOL_GPL(bio_alloc_mddev);
......@@ -188,32 +173,10 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev);
struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
struct mddev *mddev)
{
struct bio *b;
struct mddev **mddevp;
if (!mddev || !mddev->bio_set)
return bio_clone(bio, gfp_mask);
b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
mddev->bio_set);
if (!b)
return NULL;
mddevp = (void*)b;
mddevp[-1] = mddev;
b->bi_destructor = mddev_bio_destructor;
__bio_clone(b, bio);
if (bio_integrity(bio)) {
int ret;
ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
if (ret < 0) {
bio_put(b);
return NULL;
}
}
return b;
return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
}
EXPORT_SYMBOL_GPL(bio_clone_mddev);
......@@ -5006,8 +4969,7 @@ int md_run(struct mddev *mddev)
}
if (mddev->bio_set == NULL)
mddev->bio_set = bioset_create(BIO_POOL_SIZE,
sizeof(struct mddev *));
mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
......
......@@ -422,6 +422,7 @@ static int raid0_run(struct mddev *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
/* if private is not null, we are here after takeover */
if (mddev->private == NULL) {
......
......@@ -553,14 +553,6 @@ static void iblock_complete_cmd(struct se_cmd *cmd)
kfree(ibr);
}
static void iblock_bio_destructor(struct bio *bio)
{
struct se_cmd *cmd = bio->bi_private;
struct iblock_dev *ib_dev = cmd->se_dev->dev_ptr;
bio_free(bio, ib_dev->ibd_bio_set);
}
static struct bio *
iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num)
{
......@@ -582,7 +574,6 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num)
bio->bi_bdev = ib_dev->ibd_bd;
bio->bi_private = cmd;
bio->bi_destructor = iblock_bio_destructor;
bio->bi_end_io = &iblock_bio_done;
bio->bi_sector = lba;
return bio;
......
......@@ -70,23 +70,25 @@ static inline int use_bip_pool(unsigned int idx)
}
/**
* bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
* bio_integrity_alloc - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to
* @gfp_mask: Memory allocation mask
* @nr_vecs: Number of integrity metadata scatter-gather elements
* @bs: bio_set to allocate from
*
* Description: This function prepares a bio for attaching integrity
* metadata. nr_vecs specifies the maximum number of pages containing
* integrity metadata that can be attached.
*/
struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
gfp_t gfp_mask,
unsigned int nr_vecs,
struct bio_set *bs)
struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
gfp_t gfp_mask,
unsigned int nr_vecs)
{
struct bio_integrity_payload *bip;
unsigned int idx = vecs_to_idx(nr_vecs);
struct bio_set *bs = bio->bi_pool;
if (!bs)
bs = fs_bio_set;
BUG_ON(bio == NULL);
bip = NULL;
......@@ -114,37 +116,22 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
return bip;
}
EXPORT_SYMBOL(bio_integrity_alloc_bioset);
/**
* bio_integrity_alloc - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to
* @gfp_mask: Memory allocation mask
* @nr_vecs: Number of integrity metadata scatter-gather elements
*
* Description: This function prepares a bio for attaching integrity
* metadata. nr_vecs specifies the maximum number of pages containing
* integrity metadata that can be attached.
*/
struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
gfp_t gfp_mask,
unsigned int nr_vecs)
{
return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
}
EXPORT_SYMBOL(bio_integrity_alloc);
/**
* bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed
* @bs: bio_set this bio was allocated from
*
* Description: Used to free the integrity portion of a bio. Usually
* called from bio_free().
*/
void bio_integrity_free(struct bio *bio, struct bio_set *bs)
void bio_integrity_free(struct bio *bio)
{
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_set *bs = bio->bi_pool;
if (!bs)
bs = fs_bio_set;
BUG_ON(bip == NULL);
......@@ -730,19 +717,18 @@ EXPORT_SYMBOL(bio_integrity_split);
* @bio: New bio
* @bio_src: Original bio
* @gfp_mask: Memory allocation mask
* @bs: bio_set to allocate bip from
*
* Description: Called to allocate a bip when cloning a bio
*/
int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
gfp_t gfp_mask, struct bio_set *bs)
gfp_t gfp_mask)
{
struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
struct bio_integrity_payload *bip;
BUG_ON(bip_src == NULL);
bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
if (bip == NULL)
return -EIO;
......
......@@ -55,6 +55,7 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
* IO code that does not need private memory pools.
*/
struct bio_set *fs_bio_set;
EXPORT_SYMBOL(fs_bio_set);
/*
* Our slab pool management
......@@ -233,26 +234,37 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
return bvl;
}
void bio_free(struct bio *bio, struct bio_set *bs)
static void __bio_free(struct bio *bio)
{
bio_disassociate_task(bio);
if (bio_integrity(bio))
bio_integrity_free(bio);
}
static void bio_free(struct bio *bio)
{
struct bio_set *bs = bio->bi_pool;
void *p;
if (bio_has_allocated_vec(bio))
bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
__bio_free(bio);
if (bio_integrity(bio))
bio_integrity_free(bio, bs);
if (bs) {
if (bio_has_allocated_vec(bio))
bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
/*
* If we have front padding, adjust the bio pointer before freeing
*/
p = bio;
if (bs->front_pad)
/*
* If we have front padding, adjust the bio pointer before freeing
*/
p = bio;
p -= bs->front_pad;
mempool_free(p, bs->bio_pool);
mempool_free(p, bs->bio_pool);
} else {
/* Bio was allocated by bio_kmalloc() */
kfree(bio);
}
}
EXPORT_SYMBOL(bio_free);
void bio_init(struct bio *bio)
{
......@@ -262,6 +274,27 @@ void bio_init(struct bio *bio)
}
EXPORT_SYMBOL(bio_init);
/**
* bio_reset - reinitialize a bio
* @bio: bio to reset
*
* Description:
* After calling bio_reset(), @bio will be in the same state as a freshly
* allocated bio returned bio bio_alloc_bioset() - the only fields that are
* preserved are the ones that are initialized by bio_alloc_bioset(). See
* comment in struct bio.
*/
void bio_reset(struct bio *bio)
{
unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
__bio_free(bio);
memset(bio, 0, BIO_RESET_BYTES);
bio->bi_flags = flags|(1 << BIO_UPTODATE);
}
EXPORT_SYMBOL(bio_reset);
/**
* bio_alloc_bioset - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
......@@ -269,42 +302,58 @@ EXPORT_SYMBOL(bio_init);
* @bs: the bio_set to allocate from.
*
* Description:
* bio_alloc_bioset will try its own mempool to satisfy the allocation.
* If %__GFP_WAIT is set then we will block on the internal pool waiting
* for a &struct bio to become free.
* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
* backed by the @bs's mempool.
*
* Note that the caller must set ->bi_destructor on successful return
* of a bio, to do the appropriate freeing of the bio once the reference
* count drops to zero.
**/
* When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
* able to allocate a bio. This is due to the mempool guarantees. To make this
* work, callers must never allocate more than 1 bio at a time from this pool.
* Callers that need to allocate more than 1 bio must always submit the
* previously allocated bio for IO before attempting to allocate a new one.
* Failure to do so can cause deadlocks under memory pressure.
*
* RETURNS:
* Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
unsigned front_pad;
unsigned inline_vecs;
unsigned long idx = BIO_POOL_NONE;
struct bio_vec *bvl = NULL;
struct bio *bio;
void *p;
p = mempool_alloc(bs->bio_pool, gfp_mask);
if (!bs) {
if (nr_iovecs > UIO_MAXIOV)
return NULL;
p = kmalloc(sizeof(struct bio) +
nr_iovecs * sizeof(struct bio_vec),
gfp_mask);
front_pad = 0;
inline_vecs = nr_iovecs;
} else {
p = mempool_alloc(bs->bio_pool, gfp_mask);
front_pad = bs->front_pad;
inline_vecs = BIO_INLINE_VECS;
}
if (unlikely(!p))
return NULL;
bio = p + bs->front_pad;
bio = p + front_pad;
bio_init(bio);
if (unlikely(!nr_iovecs))
goto out_set;
if (nr_iovecs <= BIO_INLINE_VECS) {
bvl = bio->bi_inline_vecs;
nr_iovecs = BIO_INLINE_VECS;
} else {
if (nr_iovecs > inline_vecs) {
bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
if (unlikely(!bvl))
goto err_free;
nr_iovecs = bvec_nr_vecs(idx);
} else if (nr_iovecs) {
bvl = bio->bi_inline_vecs;
}
out_set:
bio->bi_pool = bs;
bio->bi_flags |= idx << BIO_POOL_OFFSET;
bio->bi_max_vecs = nr_iovecs;
bio->bi_io_vec = bvl;
......@@ -316,80 +365,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
}
EXPORT_SYMBOL(bio_alloc_bioset);
static void bio_fs_destructor(struct bio *bio)
{
bio_free(bio, fs_bio_set);
}
/**
* bio_alloc - allocate a new bio, memory pool backed
* @gfp_mask: allocation mask to use
* @nr_iovecs: number of iovecs
*
* bio_alloc will allocate a bio and associated bio_vec array that can hold
* at least @nr_iovecs entries. Allocations will be done from the
* fs_bio_set. Also see @bio_alloc_bioset and @bio_kmalloc.
*
* If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
* a bio. This is due to the mempool guarantees. To make this work, callers
* must never allocate more than 1 bio at a time from this pool. Callers
* that need to allocate more than 1 bio must always submit the previously
* allocated bio for IO before attempting to allocate a new one. Failure to
* do so can cause livelocks under memory pressure.
*
* RETURNS:
* Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
if (bio)
bio->bi_destructor = bio_fs_destructor;
return bio;
}
EXPORT_SYMBOL(bio_alloc);
static void bio_kmalloc_destructor(struct bio *bio)
{
if (bio_integrity(bio))
bio_integrity_free(bio, fs_bio_set);
kfree(bio);
}
/**
* bio_kmalloc - allocate a bio for I/O using kmalloc()
* @gfp_mask: the GFP_ mask given to the slab allocator
* @nr_iovecs: number of iovecs to pre-allocate
*
* Description:
* Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask contains
* %__GFP_WAIT, the allocation is guaranteed to succeed.
*
**/
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
struct bio *bio;
if (nr_iovecs > UIO_MAXIOV)
return NULL;
bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
gfp_mask);
if (unlikely(!bio))
return NULL;
bio_init(bio);
bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
bio->bi_max_vecs = nr_iovecs;
bio->bi_io_vec = bio->bi_inline_vecs;
bio->bi_destructor = bio_kmalloc_destructor;
return bio;
}
EXPORT_SYMBOL(bio_kmalloc);
void zero_fill_bio(struct bio *bio)
{
unsigned long flags;
......@@ -420,11 +395,8 @@ void bio_put(struct bio *bio)
/*
* last put frees it
*/
if (atomic_dec_and_test(&bio->bi_cnt)) {
bio_disassociate_task(bio);
bio->bi_next = NULL;
bio->bi_destructor(bio);
}
if (atomic_dec_and_test(&bio->bi_cnt))
bio_free(bio);
}
EXPORT_SYMBOL(bio_put);
......@@ -466,26 +438,28 @@ void __bio_clone(struct bio *bio, struct bio *bio_src)
EXPORT_SYMBOL(__bio_clone);
/**
* bio_clone - clone a bio
* bio_clone_bioset - clone a bio
* @bio: bio to clone
* @gfp_mask: allocation priority
* @bs: bio_set to allocate from
*
* Like __bio_clone, only also allocates the returned bio
*/
struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask,
struct bio_set *bs)
{
struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
struct bio *b;
b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs);
if (!b)
return NULL;
b->bi_destructor = bio_fs_destructor;
__bio_clone(b, bio);
if (bio_integrity(bio)) {
int ret;
ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
ret = bio_integrity_clone(b, bio, gfp_mask);
if (ret < 0) {
bio_put(b);
......@@ -495,7 +469,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
return b;
}
EXPORT_SYMBOL(bio_clone);
EXPORT_SYMBOL(bio_clone_bioset);
/**
* bio_get_nr_vecs - return approx number of vecs
......@@ -1501,7 +1475,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
bi->bi_sector + first_sectors);
BUG_ON(bi->bi_vcnt != 1);
BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0);
BUG_ON(bi->bi_idx != 0);
atomic_set(&bp->cnt, 3);
bp->error = 0;
......@@ -1511,17 +1485,22 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
bp->bio2.bi_size -= first_sectors << 9;
bp->bio1.bi_size = first_sectors << 9;
bp->bv1 = bi->bi_io_vec[0];
bp->bv2 = bi->bi_io_vec[0];
bp->bv2.bv_offset += first_sectors << 9;
bp->bv2.bv_len -= first_sectors << 9;
bp->bv1.bv_len = first_sectors << 9;
if (bi->bi_vcnt != 0) {
bp->bv1 = bi->bi_io_vec[0];
bp->bv2 = bi->bi_io_vec[0];
if (bio_is_rw(bi)) {
bp->bv2.bv_offset += first_sectors << 9;
bp->bv2.bv_len -= first_sectors << 9;
bp->bv1.bv_len = first_sectors << 9;
}
bp->bio1.bi_io_vec = &bp->bv1;
bp->bio2.bi_io_vec = &bp->bv2;
bp->bio1.bi_io_vec = &bp->bv1;
bp->bio2.bi_io_vec = &bp->bv2;
bp->bio1.bi_max_vecs = 1;
bp->bio2.bi_max_vecs = 1;
bp->bio1.bi_max_vecs = 1;
bp->bio2.bi_max_vecs = 1;
}
bp->bio1.bi_end_io = bio_pair_end_1;
bp->bio2.bi_end_io = bio_pair_end_2;
......
......@@ -116,6 +116,8 @@ EXPORT_SYMBOL(invalidate_bdev);
int set_blocksize(struct block_device *bdev, int size)
{
struct address_space *mapping;
/* Size must be a power of two, and between 512 and PAGE_SIZE */
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
return -EINVAL;
......@@ -124,6 +126,19 @@ int set_blocksize(struct block_device *bdev, int size)
if (size < bdev_logical_block_size(bdev))
return -EINVAL;
/* Prevent starting I/O or mapping the device */
percpu_down_write(&bdev->bd_block_size_semaphore);
/* Check that the block device is not memory mapped */
mapping = bdev->bd_inode->i_mapping;
mutex_lock(&mapping->i_mmap_mutex);
if (mapping_mapped(mapping)) {
mutex_unlock(&mapping->i_mmap_mutex);
percpu_up_write(&bdev->bd_block_size_semaphore);
return -EBUSY;
}
mutex_unlock(&mapping->i_mmap_mutex);
/* Don't change the size if it is same as current */
if (bdev->bd_block_size != size) {
sync_blockdev(bdev);
......@@ -131,6 +146,9 @@ int set_blocksize(struct block_device *bdev, int size)
bdev->bd_inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
}
percpu_up_write(&bdev->bd_block_size_semaphore);
return 0;
}
......@@ -441,6 +459,12 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
if (!ei)
return NULL;
if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
kmem_cache_free(bdev_cachep, ei);
return NULL;
}
return &ei->vfs_inode;
}
......@@ -449,6 +473,8 @@ static void bdev_i_callback(struct rcu_head *head)
struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
kmem_cache_free(bdev_cachep, bdi);
}
......@@ -1567,6 +1593,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return blkdev_ioctl(bdev, mode, cmd, arg);
}
ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
ssize_t ret;
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
percpu_down_read(&bdev->bd_block_size_semaphore);
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
percpu_up_read(&bdev->bd_block_size_semaphore);
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_read);
/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
......@@ -1578,12 +1620,16 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
blk_start_plug(&plug);
percpu_down_read(&bdev->bd_block_size_semaphore);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
......@@ -1592,11 +1638,29 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
percpu_up_read(&bdev->bd_block_size_semaphore);
blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
{
int ret;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
percpu_down_read(&bdev->bd_block_size_semaphore);
ret = generic_file_mmap(file, vma);
percpu_up_read(&bdev->bd_block_size_semaphore);
return ret;
}
/*
* Try to release a page associated with block device when the system
* is under memory pressure.
......@@ -1627,9 +1691,9 @@ const struct file_operations def_blk_fops = {
.llseek = block_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_read = blkdev_aio_read,
.aio_write = blkdev_aio_write,
.mmap = generic_file_mmap,
.mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
......
......@@ -814,8 +814,8 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
struct bio *bio;
if (per_dev != master_dev) {
bio = bio_kmalloc(GFP_KERNEL,
master_dev->bio->bi_max_vecs);
bio = bio_clone_kmalloc(master_dev->bio,
GFP_KERNEL);
if (unlikely(!bio)) {
ORE_DBGMSG(
"Failed to allocate BIO size=%u\n",
......@@ -824,7 +824,6 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
goto out;
}
__bio_clone(bio, master_dev->bio);
bio->bi_bdev = NULL;
bio->bi_next = NULL;
per_dev->offset = master_dev->offset;
......
......@@ -212,20 +212,41 @@ extern void bio_pair_release(struct bio_pair *dbio);
extern struct bio_set *bioset_create(unsigned int, unsigned int);
extern void bioset_free(struct bio_set *);
extern struct bio *bio_alloc(gfp_t, unsigned int);
extern struct bio *bio_kmalloc(gfp_t, unsigned int);
extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
extern void bio_put(struct bio *);
extern void bio_free(struct bio *, struct bio_set *);
extern void __bio_clone(struct bio *, struct bio *);
extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
extern struct bio_set *fs_bio_set;
static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
}
static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
}
static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
}
static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone_bioset(bio, gfp_mask, NULL);
}
extern void bio_endio(struct bio *, int);
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
extern void __bio_clone(struct bio *, struct bio *);
extern struct bio *bio_clone(struct bio *, gfp_t);
extern void bio_init(struct bio *);
extern void bio_reset(struct bio *);
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
......@@ -304,8 +325,6 @@ struct biovec_slab {
struct kmem_cache *slab;
};
extern struct bio_set *fs_bio_set;
/*
* a small number of entries is fine, not going to be performance critical.
* basically we just need to survive
......@@ -367,9 +386,31 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
/*
* Check whether this bio carries any data or not. A NULL bio is allowed.
*/
static inline int bio_has_data(struct bio *bio)
static inline bool bio_has_data(struct bio *bio)
{
return bio && bio->bi_io_vec != NULL;
if (bio && bio->bi_vcnt)
return true;
return false;
}
static inline bool bio_is_rw(struct bio *bio)
{
if (!bio_has_data(bio))
return false;
if (bio->bi_rw & REQ_WRITE_SAME)
return false;
return true;
}
static inline bool bio_mergeable(struct bio *bio)
{
if (bio->bi_rw & REQ_NOMERGE_FLAGS)
return false;
return true;
}
/*
......@@ -505,9 +546,8 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
#define bio_integrity(bio) (bio->bi_integrity != NULL)
extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
extern void bio_integrity_free(struct bio *, struct bio_set *);
extern void bio_integrity_free(struct bio *);
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
extern int bio_integrity_enabled(struct bio *bio);
extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
......@@ -517,7 +557,7 @@ extern void bio_integrity_endio(struct bio *, int);
extern void bio_integrity_advance(struct bio *, unsigned int);
extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
extern int bioset_integrity_create(struct bio_set *, int);
extern void bioset_integrity_free(struct bio_set *);
extern void bio_integrity_init(void);
......@@ -549,13 +589,13 @@ static inline int bio_integrity_prep(struct bio *bio)
return 0;
}
static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs)
static inline void bio_integrity_free(struct bio *bio)
{
return;
}
static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
gfp_t gfp_mask, struct bio_set *bs)
gfp_t gfp_mask)
{
return 0;
}
......
......@@ -59,12 +59,6 @@ struct bio {
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
atomic_t bi_cnt; /* pin count */
struct bio_vec *bi_io_vec; /* the actual vec list */
bio_end_io_t *bi_end_io;
void *bi_private;
......@@ -80,7 +74,17 @@ struct bio {
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
bio_destructor_t *bi_destructor; /* destructor */
/*
* Everything starting with bi_max_vecs will be preserved by bio_reset()
*/
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
atomic_t bi_cnt; /* pin count */
struct bio_vec *bi_io_vec; /* the actual vec list */
struct bio_set *bi_pool;
/*
* We can inline a number of vecs at the end of the bio, to avoid
......@@ -90,6 +94,8 @@ struct bio {
struct bio_vec bi_inline_vecs[0];
};
#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
/*
* bio flags
*/
......@@ -105,6 +111,13 @@ struct bio {
#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */
#define BIO_QUIET 10 /* Make BIO Quiet */
#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
/*
* Flags starting here get preserved by bio_reset() - this includes
* BIO_POOL_IDX()
*/
#define BIO_RESET_BITS 12
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
/*
......@@ -134,6 +147,7 @@ enum rq_flag_bits {
__REQ_PRIO, /* boost priority in cfq */
__REQ_DISCARD, /* request to discard sectors */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
__REQ_WRITE_SAME, /* write same block many times */
__REQ_NOIDLE, /* don't anticipate more IO after this one */
__REQ_FUA, /* forced unit access */
......@@ -172,15 +186,21 @@ enum rq_flag_bits {
#define REQ_META (1 << __REQ_META)
#define REQ_PRIO (1 << __REQ_PRIO)
#define REQ_DISCARD (1 << __REQ_DISCARD)
#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
#define REQ_COMMON_MASK \
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
REQ_SECURE)
#define REQ_CLONE_MASK REQ_COMMON_MASK
/* This mask is used for both bio and request merge checking */
#define REQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
#define REQ_RAHEAD (1 << __REQ_RAHEAD)
#define REQ_THROTTLED (1 << __REQ_THROTTLED)
......
......@@ -270,6 +270,7 @@ struct queue_limits {
unsigned int io_min;
unsigned int io_opt;
unsigned int max_discard_sectors;
unsigned int max_write_same_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
......@@ -540,8 +541,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_account_rq(rq) \
(((rq)->cmd_flags & REQ_STARTED) && \
((rq)->cmd_type == REQ_TYPE_FS || \
((rq)->cmd_flags & REQ_DISCARD)))
((rq)->cmd_type == REQ_TYPE_FS))
#define blk_pm_request(rq) \
((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
......@@ -595,17 +595,39 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
rl->flags &= ~flag;
}
static inline bool rq_mergeable(struct request *rq)
{
if (rq->cmd_type != REQ_TYPE_FS)
return false;
/*
* mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
* it already be started by driver.
*/
#define RQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD)
#define rq_mergeable(rq) \
(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
(((rq)->cmd_flags & REQ_DISCARD) || \
(rq)->cmd_type == REQ_TYPE_FS))
if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
return false;
return true;
}
static inline bool blk_check_merge_flags(unsigned int flags1,
unsigned int flags2)
{
if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD))
return false;
if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE))
return false;
if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME))
return false;
return true;
}
static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
{
if (bio_data(a) == bio_data(b))
return true;
return false;
}
/*
* q->prep_rq_fn return values
......@@ -802,6 +824,28 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
return blk_rq_cur_bytes(rq) >> 9;
}
static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
unsigned int cmd_flags)
{
if (unlikely(cmd_flags & REQ_DISCARD))
return q->limits.max_discard_sectors;
if (unlikely(cmd_flags & REQ_WRITE_SAME))
return q->limits.max_write_same_sectors;
return q->limits.max_sectors;
}
static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
{
struct request_queue *q = rq->q;
if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
return q->limits.max_hw_sectors;
return blk_queue_get_max_sectors(q, rq->cmd_flags);
}
/*
* Request issue related functions.
*/
......@@ -857,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
extern void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
......@@ -987,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
......@@ -1164,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
return queue_discard_zeroes_data(bdev_get_queue(bdev));
}
static inline unsigned int bdev_write_same(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
if (q)
return q->limits.max_write_same_sectors;
return 0;
}
static inline int queue_dma_alignment(struct request_queue *q)
{
return q ? q->dma_alignment : 511;
......
......@@ -335,6 +335,7 @@ struct inodes_stat_t {
#define BLKDISCARDZEROES _IO(0x12,124)
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
......@@ -415,6 +416,7 @@ struct inodes_stat_t {
#include <linux/migrate_mode.h>
#include <linux/uidgid.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <asm/byteorder.h>
......@@ -724,6 +726,8 @@ struct block_device {
int bd_fsfreeze_count;
/* Mutex for freeze */
struct mutex bd_fsfreeze_mutex;
/* A semaphore that prevents I/O while block size is being changed */
struct percpu_rw_semaphore bd_block_size_semaphore;
};
/*
......@@ -2570,6 +2574,8 @@ extern int generic_segment_checks(const struct iovec *iov,
unsigned long *nr_segs, size_t *count, int access_flags);
/* fs/block_dev.c */
extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
......
#ifndef _LINUX_PERCPU_RWSEM_H
#define _LINUX_PERCPU_RWSEM_H
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
struct percpu_rw_semaphore {
unsigned __percpu *counters;
bool locked;
struct mutex mtx;
};
static inline void percpu_down_read(struct percpu_rw_semaphore *p)
{
rcu_read_lock();
if (unlikely(p->locked)) {
rcu_read_unlock();
mutex_lock(&p->mtx);
this_cpu_inc(*p->counters);
mutex_unlock(&p->mtx);
return;
}
this_cpu_inc(*p->counters);
rcu_read_unlock();
}
static inline void percpu_up_read(struct percpu_rw_semaphore *p)
{
/*
* On X86, write operation in this_cpu_dec serves as a memory unlock
* barrier (i.e. memory accesses may be moved before the write, but
* no memory accesses are moved past the write).
* On other architectures this may not be the case, so we need smp_mb()
* there.
*/
#if defined(CONFIG_X86) && (!defined(CONFIG_X86_PPRO_FENCE) && !defined(CONFIG_X86_OOSTORE))
barrier();
#else
smp_mb();
#endif
this_cpu_dec(*p->counters);
}
static inline unsigned __percpu_count(unsigned __percpu *counters)
{
unsigned total = 0;
int cpu;
for_each_possible_cpu(cpu)
total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
return total;
}
static inline void percpu_down_write(struct percpu_rw_semaphore *p)
{
mutex_lock(&p->mtx);
p->locked = true;
synchronize_rcu();
while (__percpu_count(p->counters))
msleep(1);
smp_rmb(); /* paired with smp_mb() in percpu_sem_up_read() */
}
static inline void percpu_up_write(struct percpu_rw_semaphore *p)
{
p->locked = false;
mutex_unlock(&p->mtx);
}
static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
{
p->counters = alloc_percpu(unsigned);
if (unlikely(!p->counters))
return -ENOMEM;
p->locked = false;
mutex_init(&p->mtx);
return 0;
}
static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
{
free_percpu(p->counters);
p->counters = NULL; /* catch use after free bugs */
}
#endif
......@@ -201,6 +201,7 @@ static inline void *sg_virt(struct scatterlist *sg)
return page_address(sg_page(sg)) + sg->offset;
}
int sg_nents(struct scatterlist *sg);
struct scatterlist *sg_next(struct scatterlist *);
struct scatterlist *sg_last(struct scatterlist *s, unsigned int);
void sg_init_table(struct scatterlist *, unsigned int);
......
......@@ -38,6 +38,25 @@ struct scatterlist *sg_next(struct scatterlist *sg)
}
EXPORT_SYMBOL(sg_next);
/**
* sg_nents - return total count of entries in scatterlist
* @sg: The scatterlist
*
* Description:
* Allows to know how many entries are in sg, taking into acount
* chaining as well
*
**/
int sg_nents(struct scatterlist *sg)
{
int nents;
for (nents = 0; sg; sg = sg_next(sg))
nents++;
return nents;
}
EXPORT_SYMBOL(sg_nents);
/**
* sg_last - return the last scatterlist entry in a list
* @sgl: First entry in the scatterlist
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册