From 0282ca45a0a4c384226d526d4878cacd77936ef4 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Tue, 16 Dec 2014 23:18:51 -0700 Subject: [PATCH] qemu: fix bugs in blockstats The documentation for virDomainBlockInfo was confusing: it stated that 'physical' was the size of the container, then gave an example of it being the amount of storage used by a sparse file (that is, for a sparse raw image on a regular file, the wording implied capacity==physical, while allocation was smaller; but the example instead claimed physical==allocation). Since we use 'physical' for the last offset of a block device, we should do likewise for regular files. Furthermore, the example claimed that for a qcow2 regular file, allocation==physical. At the time the code was first written, this was true (qcow2 files were allocated sequentially, and were never sparse, so the last sector written happened to also match the disk space occupied); but modern qemu does much better and can punch holes for a qcow2 with allocation < physical. Basically, after this patch, the three fields are now reliably mapped as: 'capacity' - how much storage the guest can see (equal to physical for raw images, determined by image metadata otherwise) 'allocation' - how much storage the image occupies (similar to what 'du' would report) 'physical' - the last offset of the image (similar to what 'ls' would report) 'capacity' can be larger than 'physical' (such as for a qcow2 image that does not vary much from a backing file) or smaller (such as for a qcow2 file with lots of internal snapshots). Likewise, 'allocation' can be (slightly) larger than 'physical' (such as counting the tail of cluster allocations required to round a file size up to filesystem granularity) or smaller (for a sparse file). A block-resize operation changes capacity (which, for raw images, also changes physical); many non-raw images automatically grow physical and allocation as necessary when starting with an allocation smaller than capacity; and even when capacity and physical stay unchanged, allocation can change when converting sectors from holes to data or back. Note that this does not change semantics for qcow2 images stored on block devices; there, we still rely on qemu to report the highest written extent for allocation. So using this API to track when to extend a block device because a qcow2 image is about to exceed a threshold will not see any changes. Also, note that virStorageVolInfo is unfortunately limited to just 'capacity' and 'allocation' (we can't expand it to add 'physical', although we can expand the XML to add it there); historically, that struct's 'allocation' value has reported file size for qcow2 files (what this patch terms 'physical' for a domain block device), but disk usage for raw files (what this patch terms 'allocation'). So follow-up patches will be needed to make storage volumes report the same allocation values and get at physical values, where those differ. * include/libvirt/libvirt-domain.h (_virDomainBlockInfo): Tweak documentation to match saner definition. * src/qemu/qemu_driver.c (qemuDomainGetBlockInfo): For regular files, physical size is capacity, not allocation. Signed-off-by: Eric Blake --- include/libvirt/libvirt-domain.h | 23 +++++++++++------- src/qemu/qemu_driver.c | 41 ++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index ae2c49c75a..baef32d809 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -1356,7 +1356,7 @@ int virDomainBlockResize (virDomainPtr dom, /** virDomainBlockInfo: * * This struct provides information about the size of a block device - * backing store + * backing store. * * Examples: * @@ -1364,13 +1364,13 @@ int virDomainBlockResize (virDomainPtr dom, * * capacity, allocation, physical: All the same * * - Sparse raw file in filesystem: - * * capacity: logical size of the file - * * allocation, physical: number of blocks allocated to file + * * capacity, size: logical size of the file + * * allocation: disk space occupied by file * * - qcow2 file in filesystem * * capacity: logical size from qcow2 header - * * allocation, physical: logical size of the file / - * highest qcow extent (identical) + * * allocation: disk space occupied by file + * * physical: reported size of qcow2 file * * - qcow2 file in a block device * * capacity: logical size from qcow2 header @@ -1380,9 +1380,16 @@ int virDomainBlockResize (virDomainPtr dom, typedef struct _virDomainBlockInfo virDomainBlockInfo; typedef virDomainBlockInfo *virDomainBlockInfoPtr; struct _virDomainBlockInfo { - unsigned long long capacity; /* logical size in bytes of the block device backing image */ - unsigned long long allocation; /* highest allocated extent in bytes of the block device backing image */ - unsigned long long physical; /* physical size in bytes of the container of the backing image */ + unsigned long long capacity; /* logical size in bytes of the + * image (how much storage the + * guest will see) */ + unsigned long long allocation; /* host storage in bytes occupied + * by the image (such as highest + * allocated extent if there are no + * holes, similar to 'du') */ + unsigned long long physical; /* host physical size in bytes of + * the image container (last + * offset, similar to 'ls')*/ }; int virDomainGetBlockInfo(virDomainPtr dom, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 3d81a5b3fe..f7373a5a68 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -11118,18 +11118,21 @@ qemuDomainGetBlockInfo(virDomainPtr dom, /* Get info for normal formats */ if (S_ISREG(sb.st_mode) || fd == -1) { #ifndef WIN32 - src->physical = (unsigned long long)sb.st_blocks * + src->allocation = (unsigned long long)sb.st_blocks * (unsigned long long)DEV_BSIZE; #else - src->physical = sb.st_size; + src->allocation = sb.st_size; #endif - /* Regular files may be sparse, so logical size (capacity) is not same - * as actual physical above - */ - src->capacity = sb.st_size; + /* Allocation tracks when the file is sparse, physical is the + * last offset of the file. */ + src->physical = sb.st_size; } else { - /* NB. Because we configure with AC_SYS_LARGEFILE, off_t should - * be 64 bits on all platforms. + /* NB. Because we configure with AC_SYS_LARGEFILE, off_t + * should be 64 bits on all platforms. For block devices, we + * have to seek (safe even if someone else is writing) to + * determine physical size, and assume that allocation is the + * same as physical (but can refine that assumption later if + * qemu is still running). */ end = lseek(fd, 0, SEEK_END); if (end == (off_t)-1) { @@ -11138,12 +11141,12 @@ qemuDomainGetBlockInfo(virDomainPtr dom, goto endjob; } src->physical = end; - src->capacity = end; + src->allocation = end; } - /* If the file we probed has a capacity set, then override - * what we calculated from file/block extents */ - /* Probe for magic formats */ + /* Raw files: capacity is physical size. For all other files: if + * the metadata has a capacity, use that, otherwise fall back to + * physical size. */ if (!(format = src->format)) { if (!cfg->allowDiskFormatProbing) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -11159,13 +11162,15 @@ qemuDomainGetBlockInfo(virDomainPtr dom, if (!(meta = virStorageFileGetMetadataFromBuf(src->path, buf, len, format, NULL))) goto endjob; - if (meta->capacity) - src->capacity = meta->capacity; - - /* Set default value .. */ - src->allocation = src->physical; + if (format == VIR_STORAGE_FILE_RAW) + src->capacity = src->physical; + else if ((meta = virStorageFileGetMetadataFromBuf(src->path, buf, + len, format, NULL))) + src->capacity = meta->capacity ? meta->capacity : src->physical; + else + goto endjob; - /* ..but if guest is not using raw disk format and on a block device, + /* If guest is not using raw disk format and on a block device, * then query highest allocated extent from QEMU */ if (virStorageSourceGetActualType(src) == VIR_STORAGE_TYPE_BLOCK && -- GitLab