提交 e6ebbd46 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches:

- file-posix: Fix shared permission locks after reopen
- block: Fix error path for failed .bdrv_reopen_prepare
- qcow2: Catch invalid allocations when the image becomes too large
- vvfat/fdc/nvme: Fix segfaults and leaks

# gpg: Signature made Mon 19 Nov 2018 14:28:18 GMT
# gpg:                using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  iotests: Test file-posix locking and reopen
  file-posix: Fix shared locks on reopen commit
  block: Always abort reopen after prepare succeeded
  iotests: Add new test 220 for max compressed cluster offset
  qcow2: Don't allow overflow during cluster allocation
  qcow2: Document some maximum size constraints
  vvfat: Fix memory leak
  fdc: fix segfault in fdctrl_stop_transfer() when DMA is disabled
  nvme: fix oob access issue(CVE-2018-16847)
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
......@@ -3201,6 +3201,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
QDict *orig_reopen_opts;
char *discard = NULL;
bool read_only;
bool drv_prepared = false;
assert(reopen_state != NULL);
assert(reopen_state->bs->drv != NULL);
......@@ -3285,6 +3286,8 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
goto error;
}
drv_prepared = true;
/* Options that are not handled are only okay if they are unchanged
* compared to the old state. It is expected that some options are only
* used for the initial open, but not reopen (e.g. filename) */
......@@ -3350,6 +3353,15 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
reopen_state->options = qobject_ref(orig_reopen_opts);
error:
if (ret < 0 && drv_prepared) {
/* drv->bdrv_reopen_prepare() has succeeded, so we need to
* call drv->bdrv_reopen_abort() before signaling an error
* (bdrv_reopen_multiple() will not call bdrv_reopen_abort()
* when the respective bdrv_reopen_prepare() has failed) */
if (drv->bdrv_reopen_abort) {
drv->bdrv_reopen_abort(reopen_state);
}
}
qemu_opts_del(opts);
qobject_unref(orig_reopen_opts);
g_free(discard);
......
......@@ -959,7 +959,7 @@ static void raw_reopen_commit(BDRVReopenState *state)
/* Copy locks to the new fd before closing the old one. */
raw_apply_lock_bytes(NULL, rs->fd, s->locked_perm,
~s->locked_shared_perm, false, &local_err);
s->locked_shared_perm, false, &local_err);
if (local_err) {
/* shouldn't fail in a sane host, but report it just in case. */
error_report_err(local_err);
......
......@@ -31,7 +31,8 @@
#include "qemu/bswap.h"
#include "qemu/cutils.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
uint64_t max);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int64_t offset, int64_t length, uint64_t addend,
bool decrease, enum qcow2_discard_type type);
......@@ -362,7 +363,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
/* Allocate the refcount block itself and mark it as used */
int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
int64_t new_block = alloc_clusters_noref(bs, s->cluster_size, INT64_MAX);
if (new_block < 0) {
return new_block;
}
......@@ -954,7 +955,8 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs,
/* return < 0 if error */
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
uint64_t max)
{
BDRVQcow2State *s = bs->opaque;
uint64_t i, nb_clusters, refcount;
......@@ -979,9 +981,9 @@ retry:
}
/* Make sure that all offsets in the "allocated" range are representable
* in an int64_t */
* in the requested max */
if (s->free_cluster_index > 0 &&
s->free_cluster_index - 1 > (INT64_MAX >> s->cluster_bits))
s->free_cluster_index - 1 > (max >> s->cluster_bits))
{
return -EFBIG;
}
......@@ -1001,7 +1003,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
do {
offset = alloc_clusters_noref(bs, size);
offset = alloc_clusters_noref(bs, size, QCOW_MAX_CLUSTER_OFFSET);
if (offset < 0) {
return offset;
}
......@@ -1083,7 +1085,11 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
do {
if (!offset || free_in_cluster < size) {
int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size);
int64_t new_cluster;
new_cluster = alloc_clusters_noref(bs, s->cluster_size,
MIN(s->cluster_offset_mask,
QCOW_MAX_CLUSTER_OFFSET));
if (new_cluster < 0) {
return new_cluster;
}
......
......@@ -42,6 +42,12 @@
#define QCOW_MAX_CRYPT_CLUSTERS 32
#define QCOW_MAX_SNAPSHOTS 65536
/* Field widths in qcow2 mean normal cluster offsets cannot reach
* 64PB; depending on cluster size, compressed clusters can have a
* smaller limit (64PB for up to 16k clusters, then ramps down to
* 512TB for 2M clusters). */
#define QCOW_MAX_CLUSTER_OFFSET ((1ULL << 56) - 1)
/* 8 MB refcount table is enough for 2 PB images at 64k cluster size
* (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
#define QCOW_MAX_REFTABLE_SIZE S_8MiB
......
......@@ -2510,7 +2510,7 @@ static int commit_one_file(BDRVVVFATState* s,
uint32_t first_cluster = c;
mapping_t* mapping = find_mapping_for_cluster(s, c);
uint32_t size = filesize_of_direntry(direntry);
char* cluster = g_malloc(s->cluster_size);
char *cluster;
uint32_t i;
int fd = 0;
......@@ -2528,17 +2528,17 @@ static int commit_one_file(BDRVVVFATState* s,
if (fd < 0) {
fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path,
strerror(errno), errno);
g_free(cluster);
return fd;
}
if (offset > 0) {
if (lseek(fd, offset, SEEK_SET) != offset) {
qemu_close(fd);
g_free(cluster);
return -3;
}
}
cluster = g_malloc(s->cluster_size);
while (offset < size) {
uint32_t c1;
int rest_size = (size - offset > s->cluster_size ?
......
......@@ -40,7 +40,18 @@ The first cluster of a qcow2 image contains the file header:
with larger cluster sizes.
24 - 31: size
Virtual disk size in bytes
Virtual disk size in bytes.
Note: qemu has an implementation limit of 32 MB as
the maximum L1 table size. With a 2 MB cluster
size, it is unable to populate a virtual cluster
beyond 2 EB (61 bits); with a 512 byte cluster
size, it is unable to populate a virtual size
larger than 128 GB (37 bits). Meanwhile, L1/L2
table layouts limit an image to no more than 64 PB
(56 bits) of populated clusters, and an image may
hit other limits first (such as a file system's
maximum size).
32 - 35: crypt_method
0 for no encryption
......@@ -326,6 +337,17 @@ in the image file.
It contains pointers to the second level structures which are called refcount
blocks and are exactly one cluster in size.
Although a large enough refcount table can reserve clusters past 64 PB
(56 bits) (assuming the underlying protocol can even be sized that
large), note that some qcow2 metadata such as L1/L2 tables must point
to clusters prior to that point.
Note: qemu has an implementation limit of 8 MB as the maximum refcount
table size. With a 2 MB cluster size and a default refcount_order of
4, it is unable to reference host resources beyond 2 EB (61 bits); in
the worst case, with a 512 cluster size and refcount_order of 6, it is
unable to access beyond 32 GB (35 bits).
Given an offset into the image file, the refcount of its cluster can be
obtained as follows:
......@@ -365,6 +387,16 @@ The L1 table has a variable size (stored in the header) and may use multiple
clusters, however it must be contiguous in the image file. L2 tables are
exactly one cluster in size.
The L1 and L2 tables have implications on the maximum virtual file
size; for a given L1 table size, a larger cluster size is required for
the guest to have access to more space. Furthermore, a virtual
cluster must currently map to a host offset below 64 PB (56 bits)
(although this limit could be relaxed by putting reserved bits into
use). Additionally, as cluster size increases, the maximum host
offset for a compressed cluster is reduced (a 2M cluster size requires
compressed clusters to reside below 512 TB (49 bits), and this limit
cannot be relaxed without an incompatible layout change).
Given an offset into the virtual disk, the offset into the image file can be
obtained as follows:
......@@ -427,7 +459,9 @@ Standard Cluster Descriptor:
Compressed Clusters Descriptor (x = 62 - (cluster_bits - 8)):
Bit 0 - x-1: Host cluster offset. This is usually _not_ aligned to a
cluster or sector boundary!
cluster or sector boundary! If cluster_bits is
small enough that this field includes bits beyond
55, those upper bits must be set to 0.
x - 61: Number of additional 512-byte sectors used for the
compressed data, beyond the sector containing the offset
......
......@@ -1617,7 +1617,7 @@ static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0,
fdctrl->fifo[5] = cur_drv->sect;
fdctrl->fifo[6] = FD_SECTOR_SC;
fdctrl->data_dir = FD_DIR_READ;
if (!(fdctrl->msr & FD_MSR_NONDMA)) {
if (fdctrl->dma_chann != -1 && !(fdctrl->msr & FD_MSR_NONDMA)) {
IsaDmaClass *k = ISADMA_GET_CLASS(fdctrl->dma);
k->release_DREQ(fdctrl->dma, fdctrl->dma_chann);
}
......
......@@ -1175,6 +1175,10 @@ static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
unsigned size)
{
NvmeCtrl *n = (NvmeCtrl *)opaque;
if (addr + size > NVME_CMBSZ_GETSIZE(n->bar.cmbsz)) {
return;
}
memcpy(&n->cmbuf[addr], &data, size);
}
......@@ -1183,6 +1187,9 @@ static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size)
uint64_t val;
NvmeCtrl *n = (NvmeCtrl *)opaque;
if (addr + size > NVME_CMBSZ_GETSIZE(n->bar.cmbsz)) {
return 0;
}
memcpy(&val, &n->cmbuf[addr], size);
return val;
}
......
......@@ -31,6 +31,7 @@ status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
rm -f "$TEST_IMG.overlay"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
......@@ -71,6 +72,76 @@ echo 'quit' | $QEMU -nographic -monitor stdio \
_cleanup_qemu
echo
echo '=== Testing reopen ==='
echo
# This tests that reopening does not unshare any permissions it should
# not unshare
# (There was a bug where reopening shared exactly the opposite of the
# permissions it was supposed to share)
_launch_qemu
_send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'qmp_capabilities'}" \
'return'
# Open the image without any format layer (we are not going to access
# it, so that is fine)
# This should keep all permissions shared.
success_or_failure=y _send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'blockdev-add',
'arguments': {
'node-name': 'node0',
'driver': 'file',
'filename': '$TEST_IMG',
'locking': 'on'
} }" \
'return' \
'error'
# This snapshot will perform a reopen to drop R/W to RO.
# It should still keep all permissions shared.
success_or_failure=y _send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'blockdev-snapshot-sync',
'arguments': {
'node-name': 'node0',
'snapshot-file': '$TEST_IMG.overlay',
'snapshot-node-name': 'node1'
} }" \
'return' \
'error'
# Now open the same file again
# This does not require any permissions (and does not unshare any), so
# this will not conflict with node0.
success_or_failure=y _send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'blockdev-add',
'arguments': {
'node-name': 'node1',
'driver': 'file',
'filename': '$TEST_IMG',
'locking': 'on'
} }" \
'return' \
'error'
# Now we attach the image to a virtio-blk device. This device does
# require some permissions (at least WRITE and READ_CONSISTENT), so if
# reopening node0 unshared any (which it should not have), this will
# fail (but it should not).
success_or_failure=y _send_qemu_cmd $QEMU_HANDLE \
"{'execute': 'device_add',
'arguments': {
'driver': 'virtio-blk',
'drive': 'node1'
} }" \
'return' \
'error'
_cleanup_qemu
# success, all done
echo "*** done"
rm -f $seq.full
......
......@@ -5,4 +5,13 @@ Starting QEMU
Starting a second QEMU using the same image should fail
QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,id=drive0,file.locking=on: Failed to get "write" lock
Is another process using the image [TEST_DIR/t.qcow2]?
=== Testing reopen ===
{"return": {}}
{"return": {}}
Formatting 'TEST_DIR/t.qcow2.overlay', fmt=qcow2 size=197120 backing_file=TEST_DIR/t.qcow2 backing_fmt=file cluster_size=65536 lazy_refcounts=off refcount_bits=16
{"return": {}}
{"return": {}}
{"return": {}}
*** done
#!/bin/bash
#
# max limits on compression in huge qcow2 files
#
# Copyright (C) 2018 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
seq=$(basename $0)
echo "QA output created by $seq"
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
. ./common.pattern
_supported_fmt qcow2
_supported_proto file
_supported_os Linux
echo "== Creating huge file =="
# Sanity check: We require a file system that permits the creation
# of a HUGE (but very sparse) file. tmpfs works, ext4 does not.
if ! truncate --size=513T "$TEST_IMG"; then
_notrun "file system on $TEST_DIR does not support large enough files"
fi
rm "$TEST_IMG"
IMGOPTS='cluster_size=2M,refcount_bits=1' _make_test_img 513T
echo "== Populating refcounts =="
# We want an image with 256M refcounts * 2M clusters = 512T referenced.
# Each 2M cluster holds 16M refcounts; the refcount table initially uses
# 1 refblock, so we need to add 15 more. The refcount table lives at 2M,
# first refblock at 4M, L2 at 6M, so our remaining additions start at 8M.
# Then, for each refblock, mark it as fully populated.
to_hex() {
printf %016x\\n $1 | sed 's/\(..\)/\\x\1/g'
}
truncate --size=38m "$TEST_IMG"
entry=$((0x200000))
$QEMU_IO_PROG -f raw -c "w -P 0xff 4m 2m" "$TEST_IMG" | _filter_qemu_io
for i in {1..15}; do
offs=$((0x600000 + i*0x200000))
poke_file "$TEST_IMG" $((i*8 + entry)) $(to_hex $offs)
$QEMU_IO_PROG -f raw -c "w -P 0xff $offs 2m" "$TEST_IMG" | _filter_qemu_io
done
echo "== Checking file before =="
# FIXME: 'qemu-img check' doesn't diagnose refcounts beyond the end of
# the file as leaked clusters
_check_test_img 2>&1 | sed '/^Leaked cluster/d'
stat -c 'image size %s' "$TEST_IMG"
echo "== Trying to write compressed cluster =="
# Given our file size, the next available cluster at 512T lies beyond the
# maximum offset that a compressed 2M cluster can reside in
$QEMU_IO_PROG -c 'w -c 0 2m' "$TEST_IMG" | _filter_qemu_io
# The attempt failed, but ended up allocating a new refblock
stat -c 'image size %s' "$TEST_IMG"
echo "== Writing normal cluster =="
# The failed write should not corrupt the image, so a normal write succeeds
$QEMU_IO_PROG -c 'w 0 2m' "$TEST_IMG" | _filter_qemu_io
echo "== Checking file after =="
# qemu-img now sees the millions of leaked clusters, thanks to the allocations
# at 512T. Undo many of our faked references to speed up the check.
$QEMU_IO_PROG -f raw -c "w -z 5m 1m" -c "w -z 8m 30m" "$TEST_IMG" |
_filter_qemu_io
_check_test_img 2>&1 | sed '/^Leaked cluster/d'
# success, all done
echo "*** done"
rm -f $seq.full
status=0
QA output created by 220
== Creating huge file ==
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=564049465049088
== Populating refcounts ==
wrote 2097152/2097152 bytes at offset 4194304
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 8388608
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 10485760
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 12582912
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 14680064
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 16777216
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 18874368
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 20971520
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 23068672
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 25165824
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 27262976
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 29360128
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 31457280
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 33554432
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 35651584
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 2097152/2097152 bytes at offset 37748736
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== Checking file before ==
No errors were found on the image.
image size 39845888
== Trying to write compressed cluster ==
write failed: Input/output error
image size 562949957615616
== Writing normal cluster ==
wrote 2097152/2097152 bytes at offset 0
2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== Checking file after ==
wrote 1048576/1048576 bytes at offset 5242880
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 31457280/31457280 bytes at offset 8388608
30 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
8388589 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
*** done
......@@ -219,6 +219,7 @@
217 rw auto quick
218 rw auto quick
219 rw auto
220 rw auto
221 rw auto quick
222 rw auto quick
223 rw auto quick
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册