提交 7a6d04e7 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block patches for 2.1.0-rc2 (v2)

# gpg: Signature made Mon 14 Jul 2014 11:04:12 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (22 commits)
  ide: Treat read/write beyond end as invalid
  virtio-blk: Treat read/write beyond end as invalid
  virtio-blk: Bypass error action and I/O accounting on invalid r/w
  virtio-blk: Factor common checks out of virtio_blk_handle_read/write()
  dma-helpers: Fix too long qiov
  qtest: fix vhost-user-test compilation with old GLib
  tests: Fix unterminated string output visitor enum human string
  AioContext: do not rely on aio_poll(ctx, true) result to end a loop
  virtio-blk: embed VirtQueueElement in VirtIOBlockReq
  virtio-blk: avoid g_slice_new0() for VirtIOBlockReq and VirtQueueElement
  dataplane: do not free VirtQueueElement in vring_push()
  virtio-blk: avoid dataplane VirtIOBlockReq early free
  block: Assert qiov length matches request length
  qed: Make qiov match request size until backing file EOF
  qcow2: Make qiov match request size until backing file EOF
  block: Make qiov match the request size until EOF
  AioContext: speed up aio_notify
  test-aio: fix GSource-based timer test
  block: drop aio functions that operate on the main AioContext
  block: prefer aio_poll to qemu_aio_wait
  ...
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
......@@ -125,7 +125,7 @@ static bool aio_dispatch(AioContext *ctx)
bool progress = false;
/*
* We have to walk very carefully in case qemu_aio_set_fd_handler is
* We have to walk very carefully in case aio_set_fd_handler is
* called while we're walking.
*/
node = QLIST_FIRST(&ctx->aio_handlers);
......@@ -175,27 +175,56 @@ static bool aio_dispatch(AioContext *ctx)
bool aio_poll(AioContext *ctx, bool blocking)
{
AioHandler *node;
bool was_dispatching;
int ret;
bool progress;
was_dispatching = ctx->dispatching;
progress = false;
/* aio_notify can avoid the expensive event_notifier_set if
* everything (file descriptors, bottom halves, timers) will
* be re-evaluated before the next blocking poll(). This happens
* in two cases:
*
* 1) when aio_poll is called with blocking == false
*
* 2) when we are called after poll(). If we are called before
* poll(), bottom halves will not be re-evaluated and we need
* aio_notify() if blocking == true.
*
* The first aio_dispatch() only does something when AioContext is
* running as a GSource, and in that case aio_poll is used only
* with blocking == false, so this optimization is already quite
* effective. However, the code is ugly and should be restructured
* to have a single aio_dispatch() call. To do this, we need to
* reorganize aio_poll into a prepare/poll/dispatch model like
* glib's.
*
* If we're in a nested event loop, ctx->dispatching might be true.
* In that case we can restore it just before returning, but we
* have to clear it now.
*/
aio_set_dispatching(ctx, !blocking);
/*
* If there are callbacks left that have been queued, we need to call them.
* Do not call select in this case, because it is possible that the caller
* does not need a complete flush (as is the case for qemu_aio_wait loops).
* does not need a complete flush (as is the case for aio_poll loops).
*/
if (aio_bh_poll(ctx)) {
blocking = false;
progress = true;
}
/* Re-evaluate condition (1) above. */
aio_set_dispatching(ctx, !blocking);
if (aio_dispatch(ctx)) {
progress = true;
}
if (progress && !blocking) {
return true;
goto out;
}
ctx->walking_handlers++;
......@@ -234,9 +263,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
}
/* Run dispatch even if there were no readable fds to run timers */
aio_set_dispatching(ctx, true);
if (aio_dispatch(ctx)) {
progress = true;
}
out:
aio_set_dispatching(ctx, was_dispatching);
return progress;
}
......@@ -102,7 +102,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
/*
* If there are callbacks left that have been queued, we need to call then.
* Do not call select in this case, because it is possible that the caller
* does not need a complete flush (as is the case for qemu_aio_wait loops).
* does not need a complete flush (as is the case for aio_poll loops).
*/
if (aio_bh_poll(ctx)) {
blocking = false;
......@@ -115,7 +115,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
/*
* Then dispatch any pending callbacks from the GSource.
*
* We have to walk very carefully in case qemu_aio_set_fd_handler is
* We have to walk very carefully in case aio_set_fd_handler is
* called while we're walking.
*/
node = QLIST_FIRST(&ctx->aio_handlers);
......@@ -177,7 +177,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
blocking = false;
/* we have to walk very carefully in case
* qemu_aio_set_fd_handler is called while we're walking */
* aio_set_fd_handler is called while we're walking */
node = QLIST_FIRST(&ctx->aio_handlers);
while (node) {
AioHandler *tmp;
......
......@@ -26,6 +26,7 @@
#include "block/aio.h"
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */
......@@ -247,9 +248,25 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
return ctx->thread_pool;
}
void aio_set_dispatching(AioContext *ctx, bool dispatching)
{
ctx->dispatching = dispatching;
if (!dispatching) {
/* Write ctx->dispatching before reading e.g. bh->scheduled.
* Optimization: this is only needed when we're entering the "unsafe"
* phase where other threads must call event_notifier_set.
*/
smp_mb();
}
}
void aio_notify(AioContext *ctx)
{
event_notifier_set(&ctx->notifier);
/* Write e.g. bh->scheduled before reading ctx->dispatching. */
smp_mb();
if (!ctx->dispatching) {
event_notifier_set(&ctx->notifier);
}
}
static void aio_timerlist_notify(void *opaque)
......
......@@ -471,7 +471,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
co = qemu_coroutine_create(bdrv_create_co_entry);
qemu_coroutine_enter(co, &cco);
while (cco.ret == NOT_DONE) {
qemu_aio_wait();
aio_poll(qemu_get_aio_context(), true);
}
}
......@@ -3010,6 +3010,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
/* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) {
......@@ -3054,8 +3055,20 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
align >> BDRV_SECTOR_BITS);
if (max_nb_sectors > 0) {
ret = drv->bdrv_co_readv(bs, sector_num,
MIN(nb_sectors, max_nb_sectors), qiov);
QEMUIOVector local_qiov;
size_t local_sectors;
max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
local_sectors = MIN(max_nb_sectors, nb_sectors);
qemu_iovec_init(&local_qiov, qiov->niov);
qemu_iovec_concat(&local_qiov, qiov, 0,
local_sectors * BDRV_SECTOR_SIZE);
ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
&local_qiov);
qemu_iovec_destroy(&local_qiov);
} else {
ret = 0;
}
......@@ -3267,6 +3280,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
waited = wait_serialising_requests(req);
assert(!waited || !req->serialising);
......@@ -4040,7 +4054,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
if (ret < 0) {
return ret;
}
return (ret & BDRV_BLOCK_ALLOCATED);
return !!(ret & BDRV_BLOCK_ALLOCATED);
}
/*
......
......@@ -307,7 +307,7 @@ static void coroutine_fn backup_run(void *opaque)
BACKUP_SECTORS_PER_CLUSTER - i, &n);
i += n;
if (alloced == 1) {
if (alloced == 1 || n == 0) {
break;
}
}
......
......@@ -1020,11 +1020,20 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
sector_num, cur_nr_sectors);
if (n1 > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, hd_qiov.niov);
qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
n1 * BDRV_SECTOR_SIZE);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->backing_hd, sector_num,
n1, &hd_qiov);
n1, &local_qiov);
qemu_co_mutex_lock(&s->lock);
qemu_iovec_destroy(&local_qiov);
if (ret < 0) {
goto fail;
}
......
......@@ -761,17 +761,19 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
/**
* Read from the backing file or zero-fill if no backing file
*
* @s: QED state
* @pos: Byte position in device
* @qiov: Destination I/O vector
* @cb: Completion function
* @opaque: User data for completion function
* @s: QED state
* @pos: Byte position in device
* @qiov: Destination I/O vector
* @backing_qiov: Possibly shortened copy of qiov, to be allocated here
* @cb: Completion function
* @opaque: User data for completion function
*
* This function reads qiov->size bytes starting at pos from the backing file.
* If there is no backing file then zeroes are read.
*/
static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
QEMUIOVector *qiov,
QEMUIOVector **backing_qiov,
BlockDriverCompletionFunc *cb, void *opaque)
{
uint64_t backing_length = 0;
......@@ -804,15 +806,21 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
/* If the read straddles the end of the backing file, shorten it */
size = MIN((uint64_t)backing_length - pos, qiov->size);
assert(*backing_qiov == NULL);
*backing_qiov = g_new(QEMUIOVector, 1);
qemu_iovec_init(*backing_qiov, qiov->niov);
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
*backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
}
typedef struct {
GenericCB gencb;
BDRVQEDState *s;
QEMUIOVector qiov;
QEMUIOVector *backing_qiov;
struct iovec iov;
uint64_t offset;
} CopyFromBackingFileCB;
......@@ -829,6 +837,12 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
CopyFromBackingFileCB *copy_cb = opaque;
BDRVQEDState *s = copy_cb->s;
if (copy_cb->backing_qiov) {
qemu_iovec_destroy(copy_cb->backing_qiov);
g_free(copy_cb->backing_qiov);
copy_cb->backing_qiov = NULL;
}
if (ret) {
qed_copy_from_backing_file_cb(copy_cb, ret);
return;
......@@ -866,11 +880,12 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
copy_cb->s = s;
copy_cb->offset = offset;
copy_cb->backing_qiov = NULL;
copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
copy_cb->iov.iov_len = len;
qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
qed_read_backing_file(s, pos, &copy_cb->qiov,
qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
qed_copy_from_backing_file_write, copy_cb);
}
......@@ -1313,7 +1328,7 @@ static void qed_aio_read_data(void *opaque, int ret,
return;
} else if (ret != QED_CLUSTER_FOUND) {
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
qed_aio_next_io, acb);
&acb->backing_qiov, qed_aio_next_io, acb);
return;
}
......@@ -1339,6 +1354,12 @@ static void qed_aio_next_io(void *opaque, int ret)
trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
if (acb->backing_qiov) {
qemu_iovec_destroy(acb->backing_qiov);
g_free(acb->backing_qiov);
acb->backing_qiov = NULL;
}
/* Handle I/O error */
if (ret) {
qed_aio_complete(acb, ret);
......@@ -1378,6 +1399,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
acb->qiov_offset = 0;
acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
acb->backing_qiov = NULL;
acb->request.l2_table = NULL;
qemu_iovec_init(&acb->cur_qiov, qiov->niov);
......
......@@ -142,6 +142,7 @@ typedef struct QEDAIOCB {
/* Current cluster scatter-gather list */
QEMUIOVector cur_qiov;
QEMUIOVector *backing_qiov;
uint64_t cur_pos; /* position on block device, in bytes */
uint64_t cur_cluster; /* cluster offset in image file */
unsigned int cur_nclusters; /* number of clusters being accessed */
......
......@@ -790,6 +790,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
p += aiocb->aio_iov[i].iov_len;
}
assert(p - buf == aiocb->aio_nbytes);
}
nbytes = handle_aiocb_rw_linear(aiocb, buf);
......@@ -804,9 +805,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
copy = aiocb->aio_iov[i].iov_len;
}
memcpy(aiocb->aio_iov[i].iov_base, p, copy);
assert(count >= copy);
p += copy;
count -= copy;
}
assert(count == 0);
}
qemu_vfree(buf);
......@@ -993,12 +996,14 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
assert(qiov->size == acb->aio_nbytes);
}
acb->aio_nbytes = nb_sectors * 512;
acb->aio_offset = sector_num * 512;
trace_paio_submit_co(sector_num, nb_sectors, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
......@@ -1016,12 +1021,14 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
assert(qiov->size == acb->aio_nbytes);
}
acb->aio_nbytes = nb_sectors * 512;
acb->aio_offset = sector_num * 512;
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
......
......@@ -187,7 +187,7 @@ int block_job_cancel_sync(BlockJob *job)
job->opaque = &data;
block_job_cancel(job);
while (data.ret == -EINPROGRESS) {
qemu_aio_wait();
aio_poll(bdrv_get_aio_context(bs), true);
}
return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
}
......
......@@ -170,6 +170,10 @@ static void dma_bdrv_cb(void *opaque, int ret)
return;
}
if (dbs->iov.size & ~BDRV_SECTOR_MASK) {
qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
}
dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
dbs->iov.size / 512, dma_bdrv_cb, dbs);
assert(dbs->acb);
......
/*
* This model describes the interaction between aio_set_dispatching()
* and aio_notify().
*
* Author: Paolo Bonzini <pbonzini@redhat.com>
*
* This file is in the public domain. If you really want a license,
* the WTFPL will do.
*
* To simulate it:
* spin -p docs/aio_notify.promela
*
* To verify it:
* spin -a docs/aio_notify.promela
* gcc -O2 pan.c
* ./a.out -a
*/
#define MAX 4
#define LAST (1 << (MAX - 1))
#define FINAL ((LAST << 1) - 1)
bool dispatching;
bool event;
int req, done;
active proctype waiter()
{
int fetch, blocking;
do
:: done != FINAL -> {
// Computing "blocking" is separate from execution of the
// "bottom half"
blocking = (req == 0);
// This is our "bottom half"
atomic { fetch = req; req = 0; }
done = done | fetch;
// Wait for a nudge from the other side
do
:: event == 1 -> { event = 0; break; }
:: !blocking -> break;
od;
dispatching = 1;
// If you are simulating this model, you may want to add
// something like this here:
//
// int foo; foo++; foo++; foo++;
//
// This only wastes some time and makes it more likely
// that the notifier process hits the "fast path".
dispatching = 0;
}
:: else -> break;
od
}
active proctype notifier()
{
int next = 1;
int sets = 0;
do
:: next <= LAST -> {
// generate a request
req = req | next;
next = next << 1;
// aio_notify
if
:: dispatching == 0 -> sets++; event = 1;
:: else -> skip;
fi;
// Test both synchronous and asynchronous delivery
if
:: 1 -> do
:: req == 0 -> break;
od;
:: 1 -> skip;
fi;
}
:: else -> break;
od;
printf("Skipped %d event_notifier_set\n", MAX - sets);
}
#define p (done == FINAL)
never {
do
:: 1 // after an arbitrarily long prefix
:: p -> break // p becomes true
od;
do
:: !p -> accept: break // it then must remains true forever after
od
}
......@@ -65,43 +65,41 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
{
stb_p(&req->in->status, status);
vring_push(&req->dev->dataplane->vring, req->elem,
vring_push(&req->dev->dataplane->vring, &req->elem,
req->qiov.size + sizeof(*req->in));
notify_guest(req->dev->dataplane);
g_slice_free(VirtIOBlockReq, req);
}
static void handle_notify(EventNotifier *e)
{
VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
host_notifier);
VirtQueueElement *elem;
VirtIOBlockReq *req;
int ret;
MultiReqBuffer mrb = {
.num_writes = 0,
};
VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
event_notifier_test_and_clear(&s->host_notifier);
bdrv_io_plug(s->blk->conf.bs);
for (;;) {
MultiReqBuffer mrb = {
.num_writes = 0,
};
int ret;
/* Disable guest->host notifies to avoid unnecessary vmexits */
vring_disable_notification(s->vdev, &s->vring);
for (;;) {
ret = vring_pop(s->vdev, &s->vring, &elem);
VirtIOBlockReq *req = virtio_blk_alloc_request(vblk);
ret = vring_pop(s->vdev, &s->vring, &req->elem);
if (ret < 0) {
assert(elem == NULL);
virtio_blk_free_request(req);
break; /* no more requests */
}
trace_virtio_blk_data_plane_process_request(s, elem->out_num,
elem->in_num, elem->index);
trace_virtio_blk_data_plane_process_request(s, req->elem.out_num,
req->elem.in_num,
req->elem.index);
req = g_slice_new(VirtIOBlockReq);
req->dev = VIRTIO_BLK(s->vdev);
req->elem = elem;
virtio_blk_handle_request(req, &mrb);
}
......
......@@ -29,18 +29,18 @@
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
{
VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq);
VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
req->dev = s;
req->elem = g_slice_new0(VirtQueueElement);
req->qiov.size = 0;
req->next = NULL;
return req;
}
static void virtio_blk_free_request(VirtIOBlockReq *req)
void virtio_blk_free_request(VirtIOBlockReq *req)
{
if (req) {
g_slice_free(VirtQueueElement, req->elem);
g_slice_free(VirtIOBlockReq, req);
}
}
......@@ -54,7 +54,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
trace_virtio_blk_req_complete(req, status);
stb_p(&req->in->status, status);
virtqueue_push(s->vq, req->elem, req->qiov.size + sizeof(*req->in));
virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
virtio_notify(vdev, s->vq);
}
......@@ -119,7 +119,7 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
{
VirtIOBlockReq *req = virtio_blk_alloc_request(s);
if (!virtqueue_pop(s->vq, req->elem)) {
if (!virtqueue_pop(s->vq, &req->elem)) {
virtio_blk_free_request(req);
return NULL;
}
......@@ -252,7 +252,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
{
int status;
status = virtio_blk_handle_scsi_req(req->dev, req->elem);
status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
virtio_blk_req_complete(req, status);
virtio_blk_free_request(req);
}
......@@ -288,6 +288,25 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
}
static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
uint64_t sector, size_t size)
{
uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
uint64_t total_sectors;
if (sector & dev->sector_mask) {
return false;
}
if (size % dev->conf->logical_block_size) {
return false;
}
bdrv_get_geometry(dev->bs, &total_sectors);
if (sector > total_sectors || nb_sectors > total_sectors - sector) {
return false;
}
return true;
}
static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
BlockRequest *blkreq;
......@@ -295,19 +314,16 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
if (sector & req->dev->sector_mask) {
virtio_blk_rw_complete(req, -EIO);
return;
}
if (req->qiov.size % req->dev->conf->logical_block_size) {
virtio_blk_rw_complete(req, -EIO);
if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
virtio_blk_free_request(req);
return;
}
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
if (mrb->num_writes == 32) {
virtio_submit_multiwrite(req->dev->bs, mrb);
}
......@@ -329,18 +345,15 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
if (sector & req->dev->sector_mask) {
virtio_blk_rw_complete(req, -EIO);
return;
}
if (req->qiov.size % req->dev->conf->logical_block_size) {
virtio_blk_rw_complete(req, -EIO);
if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
virtio_blk_free_request(req);
return;
}
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
req->qiov.size / BDRV_SECTOR_SIZE,
virtio_blk_rw_complete, req);
......@@ -349,12 +362,12 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
struct iovec *in_iov = req->elem->in_sg;
struct iovec *iov = req->elem->out_sg;
unsigned in_num = req->elem->in_num;
unsigned out_num = req->elem->out_num;
struct iovec *in_iov = req->elem.in_sg;
struct iovec *iov = req->elem.out_sg;
unsigned in_num = req->elem.in_num;
unsigned out_num = req->elem.out_num;
if (req->elem->out_num < 1 || req->elem->in_num < 1) {
if (req->elem.out_num < 1 || req->elem.in_num < 1) {
error_report("virtio-blk missing headers");
exit(1);
}
......@@ -391,19 +404,19 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
* NB: per existing s/n string convention the string is
* terminated by '\0' only when shorter than buffer.
*/
strncpy(req->elem->in_sg[0].iov_base,
strncpy(req->elem.in_sg[0].iov_base,
s->blk.serial ? s->blk.serial : "",
MIN(req->elem->in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
virtio_blk_free_request(req);
} else if (type & VIRTIO_BLK_T_OUT) {
qemu_iovec_init_external(&req->qiov, &req->elem->out_sg[1],
req->elem->out_num - 1);
qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
req->elem.out_num - 1);
virtio_blk_handle_write(req, mrb);
} else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
/* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
qemu_iovec_init_external(&req->qiov, &req->elem->in_sg[0],
req->elem->in_num - 1);
qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
req->elem.in_num - 1);
virtio_blk_handle_read(req);
} else {
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
......@@ -627,7 +640,7 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
while (req) {
qemu_put_sbyte(f, 1);
qemu_put_buffer(f, (unsigned char *)req->elem,
qemu_put_buffer(f, (unsigned char *)&req->elem,
sizeof(VirtQueueElement));
req = req->next;
}
......@@ -652,15 +665,15 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
while (qemu_get_sbyte(f)) {
VirtIOBlockReq *req = virtio_blk_alloc_request(s);
qemu_get_buffer(f, (unsigned char *)req->elem,
qemu_get_buffer(f, (unsigned char *)&req->elem,
sizeof(VirtQueueElement));
req->next = s->rq;
s->rq = req;
virtqueue_map_sg(req->elem->in_sg, req->elem->in_addr,
req->elem->in_num, 1);
virtqueue_map_sg(req->elem->out_sg, req->elem->out_addr,
req->elem->out_num, 0);
virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
req->elem.in_num, 1);
virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
req->elem.out_num, 0);
}
return 0;
......
......@@ -499,6 +499,18 @@ static void ide_rw_error(IDEState *s) {
ide_set_irq(s->bus);
}
static bool ide_sect_range_ok(IDEState *s,
uint64_t sector, uint64_t nb_sectors)
{
uint64_t total_sectors;
bdrv_get_geometry(s->bs, &total_sectors);
if (sector > total_sectors || nb_sectors > total_sectors - sector) {
return false;
}
return true;
}
static void ide_sector_read_cb(void *opaque, int ret)
{
IDEState *s = opaque;
......@@ -554,6 +566,11 @@ void ide_sector_read(IDEState *s)
printf("sector=%" PRId64 "\n", sector_num);
#endif
if (!ide_sect_range_ok(s, sector_num, n)) {
ide_rw_error(s);
return;
}
s->iov.iov_base = s->io_buffer;
s->iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&s->qiov, &s->iov, 1);
......@@ -671,6 +688,12 @@ void ide_dma_cb(void *opaque, int ret)
sector_num, n, s->dma_cmd);
#endif
if (!ide_sect_range_ok(s, sector_num, n)) {
dma_buf_commit(s);
ide_dma_error(s);
return;
}
switch (s->dma_cmd) {
case IDE_DMA_READ:
s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num,
......@@ -790,6 +813,11 @@ void ide_sector_write(IDEState *s)
n = s->req_nb_sectors;
}
if (!ide_sect_range_ok(s, sector_num, n)) {
ide_rw_error(s);
return;
}
s->iov.iov_base = s->io_buffer;
s->iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&s->qiov, &s->iov, 1);
......
......@@ -272,7 +272,7 @@ static int get_indirect(Vring *vring, VirtQueueElement *elem,
return 0;
}
void vring_free_element(VirtQueueElement *elem)
static void vring_unmap_element(VirtQueueElement *elem)
{
int i;
......@@ -287,8 +287,6 @@ void vring_free_element(VirtQueueElement *elem)
for (i = 0; i < elem->in_num; i++) {
vring_unmap(elem->in_sg[i].iov_base, true);
}
g_slice_free(VirtQueueElement, elem);
}
/* This looks in the virtqueue and for the first available buffer, and converts
......@@ -303,14 +301,16 @@ void vring_free_element(VirtQueueElement *elem)
* Stolen from linux/drivers/vhost/vhost.c.
*/
int vring_pop(VirtIODevice *vdev, Vring *vring,
VirtQueueElement **p_elem)
VirtQueueElement *elem)
{
struct vring_desc desc;
unsigned int i, head, found = 0, num = vring->vr.num;
uint16_t avail_idx, last_avail_idx;
VirtQueueElement *elem = NULL;
int ret;
/* Initialize elem so it can be safely unmapped */
elem->in_num = elem->out_num = 0;
/* If there was a fatal error then refuse operation */
if (vring->broken) {
ret = -EFAULT;
......@@ -342,10 +342,8 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
* the index we've seen. */
head = vring->vr.avail->ring[last_avail_idx % num];
elem = g_slice_new(VirtQueueElement);
elem->index = head;
elem->in_num = elem->out_num = 0;
/* If their number is silly, that's an error. */
if (unlikely(head >= num)) {
error_report("Guest says index %u > %u is available", head, num);
......@@ -393,7 +391,6 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
/* On success, increment avail index. */
vring->last_avail_idx++;
*p_elem = elem;
return head;
out:
......@@ -401,10 +398,7 @@ out:
if (ret == -EFAULT) {
vring->broken = true;
}
if (elem) {
vring_free_element(elem);
}
*p_elem = NULL;
vring_unmap_element(elem);
return ret;
}
......@@ -418,7 +412,7 @@ void vring_push(Vring *vring, VirtQueueElement *elem, int len)
unsigned int head = elem->index;
uint16_t new;
vring_free_element(elem);
vring_unmap_element(elem);
/* Don't touch vring if a fatal error occurred */
if (vring->broken) {
......
......@@ -60,8 +60,14 @@ struct AioContext {
*/
int walking_handlers;
/* Used to avoid unnecessary event_notifier_set calls in aio_notify.
* Writes protected by lock or BQL, reads are lockless.
*/
bool dispatching;
/* lock to protect between bh's adders and deleter */
QemuMutex bh_lock;
/* Anchor of the list of Bottom Halves belonging to the context */
struct QEMUBH *first_bh;
......@@ -83,6 +89,9 @@ struct AioContext {
QEMUTimerListGroup tlg;
};
/* Used internally to synchronize aio_poll against qemu_bh_schedule. */
void aio_set_dispatching(AioContext *ctx, bool dispatching);
/**
* aio_context_new: Allocate a new AioContext.
*
......@@ -205,9 +214,9 @@ bool aio_pending(AioContext *ctx);
/* Progress in completing AIO work to occur. This can issue new pending
* aio as a result of executing I/O completion or bh callbacks.
*
* If there is no pending AIO operation or completion (bottom half),
* return false. If there are pending AIO operations of bottom halves,
* return true.
* Return whether any progress was made by executing AIO or bottom half
* handlers. If @blocking == true, this should always be true except
* if someone called aio_notify.
*
* If there are no pending bottom halves, but there are pending AIO
* operations, it may not be possible to make any progress without
......@@ -220,7 +229,7 @@ bool aio_poll(AioContext *ctx, bool blocking);
#ifdef CONFIG_POSIX
/* Register a file descriptor and associated callbacks. Behaves very similarly
* to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
* be invoked when using qemu_aio_wait().
* be invoked when using aio_poll().
*
* Code that invokes AIO completion functions should rely on this function
* instead of qemu_set_fd_handler[2].
......@@ -234,7 +243,7 @@ void aio_set_fd_handler(AioContext *ctx,
/* Register an event notifier and associated callbacks. Behaves very similarly
* to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
* will be invoked when using qemu_aio_wait().
* will be invoked when using aio_poll().
*
* Code that invokes AIO completion functions should rely on this function
* instead of event_notifier_set_handler.
......@@ -251,19 +260,6 @@ GSource *aio_get_g_source(AioContext *ctx);
/* Return the ThreadPool bound to this AioContext */
struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
/* Functions to operate on the main QEMU AioContext. */
bool qemu_aio_wait(void);
void qemu_aio_set_event_notifier(EventNotifier *notifier,
EventNotifierHandler *io_read);
#ifdef CONFIG_POSIX
void qemu_aio_set_fd_handler(int fd,
IOHandler *io_read,
IOHandler *io_write,
void *opaque);
#endif
/**
* aio_timer_new:
* @ctx: the aio context
......
......@@ -74,7 +74,7 @@ struct BlockJob {
* Set to true if the job should cancel itself. The flag must
* always be tested just before toggling the busy flag from false
* to true. After a job has been cancelled, it should only yield
* if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
* if #aio_poll will ("sooner or later") reenter the coroutine.
*/
bool cancelled;
......@@ -87,7 +87,7 @@ struct BlockJob {
/**
* Set to false by the job while it is in a quiescent state, where
* no I/O is pending and the job has yielded on any condition
* that is not detected by #qemu_aio_wait, such as a timer.
* that is not detected by #aio_poll, such as a timer.
*/
bool busy;
......
......@@ -212,7 +212,7 @@ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns);
* Yield the coroutine for a given duration
*
* Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
* resumed when using qemu_aio_wait().
* resumed when using aio_poll().
*/
void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
int64_t ns);
......
......@@ -53,8 +53,7 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n);
void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement **elem);
int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem);
void vring_push(Vring *vring, VirtQueueElement *elem, int len);
void vring_free_element(VirtQueueElement *elem);
#endif /* VRING_H */
......@@ -144,7 +144,7 @@ typedef struct MultiReqBuffer {
typedef struct VirtIOBlockReq {
VirtIOBlock *dev;
VirtQueueElement *elem;
VirtQueueElement elem;
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr out;
QEMUIOVector qiov;
......@@ -152,6 +152,10 @@ typedef struct VirtIOBlockReq {
BlockAcctCookie acct;
} VirtIOBlockReq;
VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s);
void virtio_blk_free_request(VirtIOBlockReq *req);
int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
VirtQueueElement *elem);
......
......@@ -329,6 +329,7 @@ size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int fillc, size_t bytes);
ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b);
void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf);
void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes);
bool buffer_is_zero(const void *buf, size_t len);
......
......@@ -30,6 +30,7 @@ typedef ObjectClass IOThreadClass;
static void *iothread_run(void *opaque)
{
IOThread *iothread = opaque;
bool blocking;
qemu_mutex_lock(&iothread->init_done_lock);
iothread->thread_id = qemu_get_thread_id();
......@@ -38,8 +39,10 @@ static void *iothread_run(void *opaque)
while (!iothread->stopping) {
aio_context_acquire(iothread->ctx);
while (!iothread->stopping && aio_poll(iothread->ctx, true)) {
blocking = true;
while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) {
/* Progress was made, keep going */
blocking = false;
}
aio_context_release(iothread->ctx);
}
......
......@@ -498,24 +498,3 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
{
return aio_bh_new(qemu_aio_context, cb, opaque);
}
bool qemu_aio_wait(void)
{
return aio_poll(qemu_aio_context, true);
}
#ifdef CONFIG_POSIX
void qemu_aio_set_fd_handler(int fd,
IOHandler *io_read,
IOHandler *io_write,
void *opaque)
{
aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, opaque);
}
#endif
void qemu_aio_set_event_notifier(EventNotifier *notifier,
EventNotifierHandler *io_read)
{
aio_set_event_notifier(qemu_aio_context, notifier, io_read);
}
......@@ -483,7 +483,7 @@ static int do_co_write_zeroes(BlockDriverState *bs, int64_t offset, int count,
co = qemu_coroutine_create(co_write_zeroes_entry);
qemu_coroutine_enter(co, &data);
while (!data.done) {
qemu_aio_wait();
aio_poll(bdrv_get_aio_context(bs), true);
}
if (data.ret < 0) {
return data.ret;
......@@ -2027,7 +2027,7 @@ static const cmdinfo_t resume_cmd = {
static int wait_break_f(BlockDriverState *bs, int argc, char **argv)
{
while (!bdrv_debug_is_suspended(bs, argv[1])) {
qemu_aio_wait();
aio_poll(bdrv_get_aio_context(bs), true);
}
return 0;
......
......@@ -33,7 +33,8 @@ status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
rm -f "${TEST_IMG}.copy"
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
......@@ -41,6 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
. ./common.rc
. ./common.filter
. ./common.pattern
. ./common.qemu
# Any format supporting backing files except vmdk and qcow which do not support
# smaller backing files.
......@@ -99,6 +101,29 @@ _check_test_img
# Rebase it on top of its base image
$QEMU_IMG rebase -b "$TEST_IMG.base" "$TEST_IMG"
echo
echo block-backup
echo
qemu_comm_method="monitor"
_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk
h=$QEMU_HANDLE
QEMU_COMM_TIMEOUT=1
_send_qemu_cmd $h "drive_backup disk ${TEST_IMG}.copy" "(qemu)"
qemu_cmd_repeat=20 _send_qemu_cmd $h "info block-jobs" "No active jobs"
_send_qemu_cmd $h 'quit' ""
# Base image sectors
TEST_IMG="${TEST_IMG}.copy" io readv $(( offset )) 512 1024 32
# Image sectors
TEST_IMG="${TEST_IMG}.copy" io readv $(( offset + 512 )) 512 1024 64
# Zero sectors beyond end of base image
TEST_IMG="${TEST_IMG}.copy" io_zero readv $(( offset + 32 * 1024 )) 512 1024 32
_check_test_img
# success, all done
......
此差异已折叠。
......@@ -24,14 +24,6 @@ typedef struct {
bool auto_set;
} EventNotifierTestData;
/* Wait until there are no more BHs or AIO requests */
static void wait_for_aio(void)
{
while (aio_poll(ctx, true)) {
/* Do nothing */
}
}
/* Wait until event notifier becomes inactive */
static void wait_until_inactive(EventNotifierTestData *data)
{
......@@ -204,7 +196,9 @@ static void test_bh_schedule10(void)
g_assert(aio_poll(ctx, true));
g_assert_cmpint(data.n, ==, 2);
wait_for_aio();
while (data.n < 10) {
aio_poll(ctx, true);
}
g_assert_cmpint(data.n, ==, 10);
g_assert(!aio_poll(ctx, false));
......@@ -252,7 +246,9 @@ static void test_bh_delete_from_cb(void)
qemu_bh_schedule(data1.bh);
g_assert_cmpint(data1.n, ==, 0);
wait_for_aio();
while (data1.n < data1.max) {
aio_poll(ctx, true);
}
g_assert_cmpint(data1.n, ==, data1.max);
g_assert(data1.bh == NULL);
......@@ -287,7 +283,12 @@ static void test_bh_delete_from_cb_many(void)
g_assert_cmpint(data4.n, ==, 1);
g_assert(data1.bh == NULL);
wait_for_aio();
while (data1.n < data1.max ||
data2.n < data2.max ||
data3.n < data3.max ||
data4.n < data4.max) {
aio_poll(ctx, true);
}
g_assert_cmpint(data1.n, ==, data1.max);
g_assert_cmpint(data2.n, ==, data2.max);
g_assert_cmpint(data3.n, ==, data3.max);
......@@ -306,7 +307,7 @@ static void test_bh_flush(void)
qemu_bh_schedule(data.bh);
g_assert_cmpint(data.n, ==, 0);
wait_for_aio();
g_assert(aio_poll(ctx, true));
g_assert_cmpint(data.n, ==, 1);
g_assert(!aio_poll(ctx, false));
......@@ -806,17 +807,16 @@ static void test_source_timer_schedule(void)
g_usleep(1 * G_USEC_PER_SEC);
g_assert_cmpint(data.n, ==, 0);
g_assert(g_main_context_iteration(NULL, false));
g_assert(g_main_context_iteration(NULL, true));
g_assert_cmpint(data.n, ==, 1);
expiry += data.ns;
/* The comment above was not kidding when it said this wakes up itself */
do {
g_assert(g_main_context_iteration(NULL, true));
} while (qemu_clock_get_ns(data.clock_type) <= expiry);
g_usleep(1 * G_USEC_PER_SEC);
g_main_context_iteration(NULL, false);
while (data.n < 2) {
g_main_context_iteration(NULL, true);
}
g_assert_cmpint(data.n, ==, 2);
g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL);
close(pipefd[0]);
......
......@@ -83,7 +83,7 @@ static void co_test_cb(void *opaque)
data->ret = 0;
active--;
/* The test continues in test_submit_co, after qemu_aio_wait_all... */
/* The test continues in test_submit_co, after aio_poll... */
}
static void test_submit_co(void)
......@@ -98,7 +98,7 @@ static void test_submit_co(void)
g_assert_cmpint(active, ==, 1);
g_assert_cmpint(data.ret, ==, -EINPROGRESS);
/* qemu_aio_wait_all will execute the rest of the coroutine. */
/* aio_poll will execute the rest of the coroutine. */
while (data.ret == -EINPROGRESS) {
aio_poll(ctx, true);
......
......@@ -550,3 +550,16 @@ size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
return total;
}
void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes)
{
size_t total;
unsigned int niov = qiov->niov;
assert(qiov->size >= bytes);
total = iov_discard_back(qiov->iov, &niov, bytes);
assert(total == bytes);
qiov->niov = niov;
qiov->size -= bytes;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册