提交 a6f2cb03 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging

# gpg: Signature made Thu Apr  9 10:55:11 2015 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/block-pull-request:
  block/iscsi: handle zero events from iscsi_which_events
  aio: strengthen memory barriers for bottom half scheduling
  virtio-blk: correctly dirty guest memory
  qcow2: Fix header update with overridden backing file
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
...@@ -72,12 +72,13 @@ int aio_bh_poll(AioContext *ctx) ...@@ -72,12 +72,13 @@ int aio_bh_poll(AioContext *ctx)
/* Make sure that fetching bh happens before accessing its members */ /* Make sure that fetching bh happens before accessing its members */
smp_read_barrier_depends(); smp_read_barrier_depends();
next = bh->next; next = bh->next;
if (!bh->deleted && bh->scheduled) { /* The atomic_xchg is paired with the one in qemu_bh_schedule. The
bh->scheduled = 0; * implicit memory barrier ensures that the callback sees all writes
/* Paired with write barrier in bh schedule to ensure reading for * done by the scheduling thread. It also ensures that the scheduling
* idle & callbacks coming after bh's scheduling. * thread sees the zero before bh->cb has run, and thus will call
* aio_notify again if necessary.
*/ */
smp_rmb(); if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
if (!bh->idle) if (!bh->idle)
ret = 1; ret = 1;
bh->idle = 0; bh->idle = 0;
...@@ -108,33 +109,28 @@ int aio_bh_poll(AioContext *ctx) ...@@ -108,33 +109,28 @@ int aio_bh_poll(AioContext *ctx)
void qemu_bh_schedule_idle(QEMUBH *bh) void qemu_bh_schedule_idle(QEMUBH *bh)
{ {
if (bh->scheduled)
return;
bh->idle = 1; bh->idle = 1;
/* Make sure that idle & any writes needed by the callback are done /* Make sure that idle & any writes needed by the callback are done
* before the locations are read in the aio_bh_poll. * before the locations are read in the aio_bh_poll.
*/ */
smp_wmb(); atomic_mb_set(&bh->scheduled, 1);
bh->scheduled = 1;
} }
void qemu_bh_schedule(QEMUBH *bh) void qemu_bh_schedule(QEMUBH *bh)
{ {
AioContext *ctx; AioContext *ctx;
if (bh->scheduled)
return;
ctx = bh->ctx; ctx = bh->ctx;
bh->idle = 0; bh->idle = 0;
/* Make sure that: /* The memory barrier implicit in atomic_xchg makes sure that:
* 1. idle & any writes needed by the callback are done before the * 1. idle & any writes needed by the callback are done before the
* locations are read in the aio_bh_poll. * locations are read in the aio_bh_poll.
* 2. ctx is loaded before scheduled is set and the callback has a chance * 2. ctx is loaded before scheduled is set and the callback has a chance
* to execute. * to execute.
*/ */
smp_mb(); if (atomic_xchg(&bh->scheduled, 1) == 0) {
bh->scheduled = 1;
aio_notify(ctx); aio_notify(ctx);
}
} }
......
...@@ -56,6 +56,7 @@ typedef struct IscsiLun { ...@@ -56,6 +56,7 @@ typedef struct IscsiLun {
uint64_t num_blocks; uint64_t num_blocks;
int events; int events;
QEMUTimer *nop_timer; QEMUTimer *nop_timer;
QEMUTimer *event_timer;
uint8_t lbpme; uint8_t lbpme;
uint8_t lbprz; uint8_t lbprz;
uint8_t has_write_same; uint8_t has_write_same;
...@@ -95,6 +96,7 @@ typedef struct IscsiAIOCB { ...@@ -95,6 +96,7 @@ typedef struct IscsiAIOCB {
#endif #endif
} IscsiAIOCB; } IscsiAIOCB;
#define EVENT_INTERVAL 250
#define NOP_INTERVAL 5000 #define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3 #define MAX_NOP_FAILURES 3
#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times) #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
...@@ -256,21 +258,30 @@ static void ...@@ -256,21 +258,30 @@ static void
iscsi_set_events(IscsiLun *iscsilun) iscsi_set_events(IscsiLun *iscsilun)
{ {
struct iscsi_context *iscsi = iscsilun->iscsi; struct iscsi_context *iscsi = iscsilun->iscsi;
int ev; int ev = iscsi_which_events(iscsi);
/* We always register a read handler. */
ev = POLLIN;
ev |= iscsi_which_events(iscsi);
if (ev != iscsilun->events) { if (ev != iscsilun->events) {
aio_set_fd_handler(iscsilun->aio_context, aio_set_fd_handler(iscsilun->aio_context,
iscsi_get_fd(iscsi), iscsi_get_fd(iscsi),
iscsi_process_read, (ev & POLLIN) ? iscsi_process_read : NULL,
(ev & POLLOUT) ? iscsi_process_write : NULL, (ev & POLLOUT) ? iscsi_process_write : NULL,
iscsilun); iscsilun);
iscsilun->events = ev;
}
/* newer versions of libiscsi may return zero events. In this
* case start a timer to ensure we are able to return to service
* once this situation changes. */
if (!ev) {
timer_mod(iscsilun->event_timer,
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
} }
}
iscsilun->events = ev; static void iscsi_timed_set_events(void *opaque)
{
IscsiLun *iscsilun = opaque;
iscsi_set_events(iscsilun);
} }
static void static void
...@@ -1214,6 +1225,11 @@ static void iscsi_detach_aio_context(BlockDriverState *bs) ...@@ -1214,6 +1225,11 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
timer_free(iscsilun->nop_timer); timer_free(iscsilun->nop_timer);
iscsilun->nop_timer = NULL; iscsilun->nop_timer = NULL;
} }
if (iscsilun->event_timer) {
timer_del(iscsilun->event_timer);
timer_free(iscsilun->event_timer);
iscsilun->event_timer = NULL;
}
} }
static void iscsi_attach_aio_context(BlockDriverState *bs, static void iscsi_attach_aio_context(BlockDriverState *bs,
...@@ -1230,6 +1246,11 @@ static void iscsi_attach_aio_context(BlockDriverState *bs, ...@@ -1230,6 +1246,11 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
iscsi_nop_timed_event, iscsilun); iscsi_nop_timed_event, iscsilun);
timer_mod(iscsilun->nop_timer, timer_mod(iscsilun->nop_timer,
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
/* Prepare a timer for a delayed call to iscsi_set_events */
iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
QEMU_CLOCK_REALTIME, SCALE_MS,
iscsi_timed_set_events, iscsilun);
} }
static bool iscsi_is_write_protected(IscsiLun *iscsilun) static bool iscsi_is_write_protected(IscsiLun *iscsilun)
......
...@@ -140,6 +140,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, ...@@ -140,6 +140,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
return 3; return 3;
} }
bs->backing_format[ext.len] = '\0'; bs->backing_format[ext.len] = '\0';
s->image_backing_format = g_strdup(bs->backing_format);
#ifdef DEBUG_EXT #ifdef DEBUG_EXT
printf("Qcow2: Got format extension %s\n", bs->backing_format); printf("Qcow2: Got format extension %s\n", bs->backing_format);
#endif #endif
...@@ -884,6 +885,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, ...@@ -884,6 +885,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
goto fail; goto fail;
} }
bs->backing_file[len] = '\0'; bs->backing_file[len] = '\0';
s->image_backing_file = g_strdup(bs->backing_file);
} }
/* Internal snapshots */ /* Internal snapshots */
...@@ -1457,6 +1459,9 @@ static void qcow2_close(BlockDriverState *bs) ...@@ -1457,6 +1459,9 @@ static void qcow2_close(BlockDriverState *bs)
g_free(s->unknown_header_fields); g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs); cleanup_unknown_header_ext(bs);
g_free(s->image_backing_file);
g_free(s->image_backing_format);
g_free(s->cluster_cache); g_free(s->cluster_cache);
qemu_vfree(s->cluster_data); qemu_vfree(s->cluster_data);
qcow2_refcount_close(bs); qcow2_refcount_close(bs);
...@@ -1622,9 +1627,10 @@ int qcow2_update_header(BlockDriverState *bs) ...@@ -1622,9 +1627,10 @@ int qcow2_update_header(BlockDriverState *bs)
} }
/* Backing file format header extension */ /* Backing file format header extension */
if (*bs->backing_format) { if (s->image_backing_format) {
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
bs->backing_format, strlen(bs->backing_format), s->image_backing_format,
strlen(s->image_backing_format),
buflen); buflen);
if (ret < 0) { if (ret < 0) {
goto fail; goto fail;
...@@ -1682,8 +1688,8 @@ int qcow2_update_header(BlockDriverState *bs) ...@@ -1682,8 +1688,8 @@ int qcow2_update_header(BlockDriverState *bs)
buflen -= ret; buflen -= ret;
/* Backing file name */ /* Backing file name */
if (*bs->backing_file) { if (s->image_backing_file) {
size_t backing_file_len = strlen(bs->backing_file); size_t backing_file_len = strlen(s->image_backing_file);
if (buflen < backing_file_len) { if (buflen < backing_file_len) {
ret = -ENOSPC; ret = -ENOSPC;
...@@ -1691,7 +1697,7 @@ int qcow2_update_header(BlockDriverState *bs) ...@@ -1691,7 +1697,7 @@ int qcow2_update_header(BlockDriverState *bs)
} }
/* Using strncpy is ok here, since buf is not NUL-terminated. */ /* Using strncpy is ok here, since buf is not NUL-terminated. */
strncpy(buf, bs->backing_file, buflen); strncpy(buf, s->image_backing_file, buflen);
header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
header->backing_file_size = cpu_to_be32(backing_file_len); header->backing_file_size = cpu_to_be32(backing_file_len);
...@@ -1712,9 +1718,17 @@ fail: ...@@ -1712,9 +1718,17 @@ fail:
static int qcow2_change_backing_file(BlockDriverState *bs, static int qcow2_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt) const char *backing_file, const char *backing_fmt)
{ {
BDRVQcowState *s = bs->opaque;
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
g_free(s->image_backing_file);
g_free(s->image_backing_format);
s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
return qcow2_update_header(bs); return qcow2_update_header(bs);
} }
...@@ -2751,8 +2765,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, ...@@ -2751,8 +2765,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
} }
if (backing_file || backing_format) { if (backing_file || backing_format) {
ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file, ret = qcow2_change_backing_file(bs,
backing_format ?: bs->backing_format); backing_file ?: s->image_backing_file,
backing_format ?: s->image_backing_format);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
......
...@@ -283,6 +283,12 @@ typedef struct BDRVQcowState { ...@@ -283,6 +283,12 @@ typedef struct BDRVQcowState {
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
QTAILQ_HEAD (, Qcow2DiscardRegion) discards; QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
bool cache_discards; bool cache_discards;
/* Backing file path and format as stored in the image (this is not the
* effective path/format, which may be the result of a runtime option
* override) */
char *image_backing_file;
char *image_backing_format;
} BDRVQcowState; } BDRVQcowState;
struct QCowAIOCB; struct QCowAIOCB;
......
...@@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) ...@@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
VirtIOBlockDataPlane *s = req->dev->dataplane; VirtIOBlockDataPlane *s = req->dev->dataplane;
stb_p(&req->in->status, status); stb_p(&req->in->status, status);
vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
req->qiov.size + sizeof(*req->in));
/* Suppress notification to guest by BH and its scheduled /* Suppress notification to guest by BH and its scheduled
* flag because requests are completed as a batch after io * flag because requests are completed as a batch after io
......
...@@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) ...@@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
req->dev = s; req->dev = s;
req->qiov.size = 0; req->qiov.size = 0;
req->in_len = 0;
req->next = NULL; req->next = NULL;
req->mr_next = NULL; req->mr_next = NULL;
return req; return req;
...@@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, ...@@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
trace_virtio_blk_req_complete(req, status); trace_virtio_blk_req_complete(req, status);
stb_p(&req->in->status, status); stb_p(&req->in->status, status);
virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); virtqueue_push(s->vq, &req->elem, req->in_len);
virtio_notify(vdev, s->vq); virtio_notify(vdev, s->vq);
} }
...@@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) ...@@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
if (ret) { if (ret) {
int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
bool is_read = !(p & VIRTIO_BLK_T_OUT); bool is_read = !(p & VIRTIO_BLK_T_OUT);
/* Note that memory may be dirtied on read failure. If the
* virtio request is not completed here, as is the case for
* BLOCK_ERROR_ACTION_STOP, the memory may not be copied
* correctly during live migration. While this is ugly,
* it is acceptable because the device is free to write to
* the memory until the request is completed (which will
* happen on the other side of the migration).
*/
if (virtio_blk_handle_rw_error(req, -ret, is_read)) { if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
continue; continue;
} }
...@@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) ...@@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
exit(1); exit(1);
} }
/* We always touch the last byte, so just see how big in_iov is. */
req->in_len = iov_size(in_iov, in_num);
req->in = (void *)in_iov[in_num - 1].iov_base req->in = (void *)in_iov[in_num - 1].iov_base
+ in_iov[in_num - 1].iov_len + in_iov[in_num - 1].iov_len
- sizeof(struct virtio_blk_inhdr); - sizeof(struct virtio_blk_inhdr);
......
...@@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { ...@@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
struct virtio_blk_inhdr *in; struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr out; struct virtio_blk_outhdr out;
QEMUIOVector qiov; QEMUIOVector qiov;
size_t in_len;
struct VirtIOBlockReq *next; struct VirtIOBlockReq *next;
struct VirtIOBlockReq *mr_next; struct VirtIOBlockReq *mr_next;
BlockAcctCookie acct; BlockAcctCookie acct;
......
#!/bin/bash
#
# Test that temporary backing file overrides (on the command line or in
# blockdev-add) don't replace the original path stored in the image during
# header updates.
#
# Copyright (C) 2015 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=kwolf@redhat.com
seq="$(basename $0)"
echo "QA output created by $seq"
here="$PWD"
tmp=/tmp/$$
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
. ./common.qemu
_supported_fmt qcow2
_supported_proto generic
_supported_os Linux
qemu_comm_method="monitor"
TEST_IMG="$TEST_IMG.orig" _make_test_img 64M
TEST_IMG="$TEST_IMG.base" _make_test_img 64M
_make_test_img 64M
_img_info | _filter_img_info
echo
echo "=== HMP commit ==="
echo
# bdrv_make_empty() involves a header update for qcow2
# Test that a backing file isn't written
_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base"
_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)"
_send_qemu_cmd $QEMU_HANDLE '' '(qemu)'
_cleanup_qemu
_img_info | _filter_img_info
# Make sure that if there was a backing file that was just overridden on the
# command line, that backing file is retained, with the right format
_make_test_img -F raw -b "$TEST_IMG.orig" 64M
_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base",backing.driver=$IMGFMT
_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)"
_send_qemu_cmd $QEMU_HANDLE '' '(qemu)'
_cleanup_qemu
_img_info | _filter_img_info
echo
echo "=== Marking image dirty (lazy refcounts) ==="
echo
# Test that a backing file isn't written
_make_test_img 64M
$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io
_img_info | _filter_img_info
# Make sure that if there was a backing file that was just overridden on the
# command line, that backing file is retained, with the right format
_make_test_img -F raw -b "$TEST_IMG.orig" 64M
$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,backing.driver=$IMGFMT,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io
_img_info | _filter_img_info
# success, all done
echo '*** done'
rm -f $seq.full
status=0
QA output created by 130
Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864
Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
=== HMP commit ===
QEMU X.Y.Z monitor - type 'help' for more information
(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0
(qemu)
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw'
QEMU X.Y.Z monitor - type 'help' for more information
(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0
(qemu)
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
backing file: TEST_DIR/t.IMGFMT.orig
backing file format: raw
=== Marking image dirty (lazy refcounts) ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
wrote 4096/4096 bytes at offset 0
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw'
wrote 4096/4096 bytes at offset 0
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
backing file: TEST_DIR/t.IMGFMT.orig
backing file format: raw
*** done
...@@ -124,3 +124,4 @@ ...@@ -124,3 +124,4 @@
121 rw auto 121 rw auto
123 rw auto quick 123 rw auto quick
128 rw auto quick 128 rw auto quick
130 rw auto quick
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册