提交 69f47505 编写于 作者: V Vladimir Sementsov-Ogievskiy 提交者: Kevin Wolf

block: avoid recursive block_status call if possible

drv_co_block_status digs bs->file for additional, more accurate search
for hole inside region, reported as DATA by bs since 5daa74a6.

This accuracy is not free: assume we have qcow2 disk. Actually, qcow2
knows, where are holes and where is data. But every block_status
request calls lseek additionally. Assume a big disk, full of
data, in any iterative copying block job (or img convert) we'll call
lseek(HOLE) on every iteration, and each of these lseeks will have to
iterate through all metadata up to the end of file. It's obviously
ineffective behavior. And for many scenarios we don't need this lseek
at all.

However, lseek is needed when we have metadata-preallocated image.

So, let's detect metadata-preallocation case and don't dig qcow2's
protocol file in other cases.

The idea is to compare allocation size in POV of filesystem with
allocations size in POV of Qcow2 (by refcounts). If allocation in fs is
significantly lower, consider it as metadata-preallocation case.

102 iotest changed, as our detector can't detect shrinked file as
metadata-preallocation, which don't seem to be wrong, as with metadata
preallocation we always have valid file length.

Two other iotests have a slight change in their QMP output sequence:
Active 'block-commit' returns earlier because the job coroutine yields
earlier on a blocking operation. This operation is loading the refcount
blocks in qcow2_detect_metadata_preallocation().
Suggested-by: NDenis V. Lunev <den@openvz.org>
Signed-off-by: NVladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: NKevin Wolf <kwolf@redhat.com>
上级 52f2b896
...@@ -2092,6 +2092,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, ...@@ -2092,6 +2092,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
*/ */
assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) && assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
align > offset - aligned_offset); align > offset - aligned_offset);
if (ret & BDRV_BLOCK_RECURSE) {
assert(ret & BDRV_BLOCK_DATA);
assert(ret & BDRV_BLOCK_OFFSET_VALID);
assert(!(ret & BDRV_BLOCK_ZERO));
}
*pnum -= offset - aligned_offset; *pnum -= offset - aligned_offset;
if (*pnum > bytes) { if (*pnum > bytes) {
*pnum = bytes; *pnum = bytes;
...@@ -2122,7 +2128,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, ...@@ -2122,7 +2128,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
} }
} }
if (want_zero && local_file && local_file != bs && if (want_zero && ret & BDRV_BLOCK_RECURSE &&
local_file && local_file != bs &&
(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
(ret & BDRV_BLOCK_OFFSET_VALID)) { (ret & BDRV_BLOCK_OFFSET_VALID)) {
int64_t file_pnum; int64_t file_pnum;
......
...@@ -3444,3 +3444,35 @@ int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) ...@@ -3444,3 +3444,35 @@ int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
"There are no references in the refcount table."); "There are no references in the refcount table.");
return -EIO; return -EIO;
} }
int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
int64_t i, end_cluster, cluster_count = 0, threshold;
int64_t file_length, real_allocation, real_clusters;
file_length = bdrv_getlength(bs->file->bs);
if (file_length < 0) {
return file_length;
}
real_allocation = bdrv_get_allocated_file_size(bs->file->bs);
if (real_allocation < 0) {
return real_allocation;
}
real_clusters = real_allocation / s->cluster_size;
threshold = MAX(real_clusters * 10 / 9, real_clusters + 2);
end_cluster = size_to_clusters(s, file_length);
for (i = 0; i < end_cluster && cluster_count < threshold; i++) {
uint64_t refcount;
int ret = qcow2_get_refcount(bs, i, &refcount);
if (ret < 0) {
return ret;
}
cluster_count += !!refcount;
}
return cluster_count >= threshold;
}
...@@ -1895,6 +1895,12 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, ...@@ -1895,6 +1895,12 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
unsigned int bytes; unsigned int bytes;
int status = 0; int status = 0;
if (!s->metadata_preallocation_checked) {
ret = qcow2_detect_metadata_preallocation(bs);
s->metadata_preallocation = (ret == 1);
s->metadata_preallocation_checked = true;
}
bytes = MIN(INT_MAX, count); bytes = MIN(INT_MAX, count);
qemu_co_mutex_lock(&s->lock); qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
...@@ -1917,6 +1923,11 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, ...@@ -1917,6 +1923,11 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
} else if (ret != QCOW2_CLUSTER_UNALLOCATED) { } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
status |= BDRV_BLOCK_DATA; status |= BDRV_BLOCK_DATA;
} }
if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
(status & BDRV_BLOCK_OFFSET_VALID))
{
status |= BDRV_BLOCK_RECURSE;
}
return status; return status;
} }
......
...@@ -356,6 +356,9 @@ typedef struct BDRVQcow2State { ...@@ -356,6 +356,9 @@ typedef struct BDRVQcow2State {
int nb_threads; int nb_threads;
BdrvChild *data_file; BdrvChild *data_file;
bool metadata_preallocation_checked;
bool metadata_preallocation;
} BDRVQcow2State; } BDRVQcow2State;
typedef struct Qcow2COWRegion { typedef struct Qcow2COWRegion {
...@@ -655,6 +658,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, ...@@ -655,6 +658,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
void *cb_opaque, Error **errp); void *cb_opaque, Error **errp);
int qcow2_shrink_reftable(BlockDriverState *bs); int qcow2_shrink_reftable(BlockDriverState *bs);
int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
int qcow2_detect_metadata_preallocation(BlockDriverState *bs);
/* qcow2-cluster.c functions */ /* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
......
...@@ -156,10 +156,15 @@ typedef struct HDGeometry { ...@@ -156,10 +156,15 @@ typedef struct HDGeometry {
* BDRV_BLOCK_EOF: the returned pnum covers through end of file for this * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
* layer, set by block layer * layer, set by block layer
* *
* Internal flag: * Internal flags:
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
* that the block layer recompute the answer from the returned * that the block layer recompute the answer from the returned
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
* BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
* zeroes in file child of current block node inside
* returned region. Only valid together with both
* BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
* appear with BDRV_BLOCK_ZERO.
* *
* If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
* host offset within the returned BDS that is allocated for the * host offset within the returned BDS that is allocated for the
...@@ -184,6 +189,7 @@ typedef struct HDGeometry { ...@@ -184,6 +189,7 @@ typedef struct HDGeometry {
#define BDRV_BLOCK_RAW 0x08 #define BDRV_BLOCK_RAW 0x08
#define BDRV_BLOCK_ALLOCATED 0x10 #define BDRV_BLOCK_ALLOCATED 0x10
#define BDRV_BLOCK_EOF 0x20 #define BDRV_BLOCK_EOF 0x20
#define BDRV_BLOCK_RECURSE 0x40
#define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK #define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK
typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
......
...@@ -55,7 +55,7 @@ $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io ...@@ -55,7 +55,7 @@ $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024)) $QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024))
$QEMU_IO -c map "$TEST_IMG" $QEMU_IO -c map "$TEST_IMG"
$QEMU_IMG map "$TEST_IMG" $QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
echo echo
echo '=== Testing map on an image file truncated outside of qemu ===' echo '=== Testing map on an image file truncated outside of qemu ==='
......
...@@ -7,7 +7,8 @@ wrote 65536/65536 bytes at offset 0 ...@@ -7,7 +7,8 @@ wrote 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
Image resized. Image resized.
64 KiB (0x10000) bytes allocated at offset 0 bytes (0x0) 64 KiB (0x10000) bytes allocated at offset 0 bytes (0x0)
Offset Length Mapped to File Offset Length File
0 0x10000 TEST_DIR/t.IMGFMT
=== Testing map on an image file truncated outside of qemu === === Testing map on an image file truncated outside of qemu ===
......
...@@ -42,9 +42,9 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. ...@@ -42,9 +42,9 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
{"return": {}} {"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
{"return": {}}
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
{"return": {}} {"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
......
...@@ -14,10 +14,10 @@ Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 size=536870912 backing_file=TEST_DIR/ ...@@ -14,10 +14,10 @@ Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 size=536870912 backing_file=TEST_DIR/
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "virtio0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "virtio0"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "virtio0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
{"return": {}} {"return": {}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "virtio0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "virtio0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册