/* * storage_backend_disk.c: storage backend for disk handling * * Copyright (C) 2007-2014 Red Hat, Inc. * Copyright (C) 2007-2008 Daniel P. Berrange * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see * . * * Author: Daniel P. Berrange */ #include #include #include #include #include "dirname.h" #include "virerror.h" #include "virlog.h" #include "storage_backend_disk.h" #include "viralloc.h" #include "vircommand.h" #include "virfile.h" #include "configmake.h" #include "virstring.h" #define VIR_FROM_THIS VIR_FROM_STORAGE VIR_LOG_INIT("storage.storage_backend_disk"); #define SECTOR_SIZE 512 static int virStorageBackendDiskMakeDataVol(virStoragePoolObjPtr pool, char **const groups, virStorageVolDefPtr vol) { char *tmp, *devpath; if (vol == NULL) { if (VIR_ALLOC(vol) < 0) return -1; /* Prepended path will be same for all partitions, so we can * strip the path to form a reasonable pool-unique name */ tmp = strrchr(groups[0], '/'); if (VIR_STRDUP(vol->name, tmp ? tmp + 1 : groups[0]) < 0 || VIR_APPEND_ELEMENT_COPY(pool->volumes.objs, pool->volumes.count, vol) < 0) { virStorageVolDefFree(vol); return -1; } } if (vol->target.path == NULL) { if (VIR_STRDUP(devpath, groups[0]) < 0) return -1; /* Now figure out the stable path * * XXX this method is O(N) because it scans the pool target * dir every time its run. Should figure out a more efficient * way of doing this... */ vol->target.path = virStorageBackendStablePath(pool, devpath, true); VIR_FREE(devpath); if (vol->target.path == NULL) return -1; } if (vol->key == NULL) { /* XXX base off a unique key of the underlying disk */ if (VIR_STRDUP(vol->key, vol->target.path) < 0) return -1; } if (vol->source.extents == NULL) { if (VIR_ALLOC(vol->source.extents) < 0) return -1; vol->source.nextent = 1; if (virStrToLong_ull(groups[3], NULL, 10, &vol->source.extents[0].start) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("cannot parse device start location")); return -1; } if (virStrToLong_ull(groups[4], NULL, 10, &vol->source.extents[0].end) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("cannot parse device end location")); return -1; } if (VIR_STRDUP(vol->source.extents[0].path, pool->def->source.devices[0].path) < 0) return -1; } /* Refresh allocation/capacity/perms */ if (virStorageBackendUpdateVolInfo(vol, true, false, VIR_STORAGE_VOL_OPEN_DEFAULT) < 0) return -1; /* set partition type */ if (STREQ(groups[1], "normal")) vol->source.partType = VIR_STORAGE_VOL_DISK_TYPE_PRIMARY; else if (STREQ(groups[1], "logical")) vol->source.partType = VIR_STORAGE_VOL_DISK_TYPE_LOGICAL; else if (STREQ(groups[1], "extended")) vol->source.partType = VIR_STORAGE_VOL_DISK_TYPE_EXTENDED; else vol->source.partType = VIR_STORAGE_VOL_DISK_TYPE_NONE; vol->type = VIR_STORAGE_VOL_BLOCK; /* The above gets allocation wrong for * extended partitions, so overwrite it */ vol->target.allocation = vol->target.capacity = (vol->source.extents[0].end - vol->source.extents[0].start); if (STRNEQ(groups[2], "metadata")) pool->def->allocation += vol->target.allocation; if (vol->source.extents[0].end > pool->def->capacity) pool->def->capacity = vol->source.extents[0].end; return 0; } static int virStorageBackendDiskMakeFreeExtent(virStoragePoolObjPtr pool, char **const groups) { virStoragePoolSourceDevicePtr dev = &pool->def->source.devices[0]; if (VIR_REALLOC_N(dev->freeExtents, dev->nfreeExtent + 1) < 0) return -1; memset(dev->freeExtents + dev->nfreeExtent, 0, sizeof(dev->freeExtents[0])); /* set type of free area */ if (STREQ(groups[1], "logical")) { dev->freeExtents[dev->nfreeExtent].type = VIR_STORAGE_FREE_LOGICAL; } else { dev->freeExtents[dev->nfreeExtent].type = VIR_STORAGE_FREE_NORMAL; } if (virStrToLong_ull(groups[3], NULL, 10, &dev->freeExtents[dev->nfreeExtent].start) < 0) return -1; /* Don't bother to re-alloc freeExtents - it'll be free'd shortly */ if (virStrToLong_ull(groups[4], NULL, 10, &dev->freeExtents[dev->nfreeExtent].end) < 0) return -1; /* Don't bother to re-alloc freeExtents - it'll be free'd shortly */ /* first block reported as free, even if it is not */ if (dev->freeExtents[dev->nfreeExtent].start == 0) { dev->freeExtents[dev->nfreeExtent].start = SECTOR_SIZE; } pool->def->available += (dev->freeExtents[dev->nfreeExtent].end - dev->freeExtents[dev->nfreeExtent].start); if (dev->freeExtents[dev->nfreeExtent].end > pool->def->capacity) pool->def->capacity = dev->freeExtents[dev->nfreeExtent].end; dev->nfreeExtent++; return 0; } struct virStorageBackendDiskPoolVolData { virStoragePoolObjPtr pool; virStorageVolDefPtr vol; }; static int virStorageBackendDiskMakeVol(size_t ntok ATTRIBUTE_UNUSED, char **const groups, void *opaque) { struct virStorageBackendDiskPoolVolData *data = opaque; virStoragePoolObjPtr pool = data->pool; /* * Ignore normal+metadata, and logical+metadata partitions * since they're basically internal book-keeping regions * we have no control over. Do keep extended+metadata though * because that's the MS-DOS extended partition region we * need to be able to view/create/delete */ if ((STREQ(groups[1], "normal") || STREQ(groups[1], "logical")) && STREQ(groups[2], "metadata")) return 0; /* Remaining data / metadata parts get turn into volumes... */ if (STREQ(groups[2], "metadata") || STREQ(groups[2], "data")) { virStorageVolDefPtr vol = data->vol; if (vol) { /* We're searching for a specific vol only */ if (vol->key) { if (STRNEQ(vol->key, groups[0])) return 0; } else if (virStorageVolDefFindByKey(pool, groups[0]) != NULL) { /* If no key, the volume must be newly created. If groups[0] * isn't already a volume, assume it's the path we want */ return 0; } } return virStorageBackendDiskMakeDataVol(pool, groups, vol); } else if (STREQ(groups[2], "free")) { /* ....or free space extents */ return virStorageBackendDiskMakeFreeExtent(pool, groups); } else { /* This code path should never happen unless someone changed * libvirt_parthelper forgot to change this code */ return -1; } } /* To get a list of partitions we run an external helper * tool which then uses parted APIs. This is because * parted's API is not compatible with libvirt's license * but we really really want to use parted because the * other options all suck :-) * * All the other storage backends run an external tool for * listing volumes so this really isn't too much of a pain, * and we can even ensure the output is friendly. */ static int virStorageBackendDiskReadPartitions(virStoragePoolObjPtr pool, virStorageVolDefPtr vol) { /* * # libvirt_parthelper DEVICE * /dev/sda1 normal data 32256 106928128 106896384 * /dev/sda2 normal data 106928640 100027629568 99920701440 * - normal metadata 100027630080 100030242304 2612736 * */ char *parthelper_path; virCommandPtr cmd; struct virStorageBackendDiskPoolVolData cbdata = { .pool = pool, .vol = vol, }; int ret; if (!(parthelper_path = virFileFindResource("libvirt_parthelper", "src", LIBEXECDIR))) return -1; cmd = virCommandNewArgList(parthelper_path, pool->def->source.devices[0].path, NULL); pool->def->allocation = pool->def->capacity = pool->def->available = 0; ret = virCommandRunNul(cmd, 6, virStorageBackendDiskMakeVol, &cbdata); virCommandFree(cmd); VIR_FREE(parthelper_path); return ret; } static int virStorageBackendDiskMakePoolGeometry(size_t ntok ATTRIBUTE_UNUSED, char **const groups, void *data) { virStoragePoolObjPtr pool = data; virStoragePoolSourceDevicePtr device = &(pool->def->source.devices[0]); if (virStrToLong_i(groups[0], NULL, 0, &device->geometry.cylinders) < 0 || virStrToLong_i(groups[1], NULL, 0, &device->geometry.heads) < 0 || virStrToLong_i(groups[2], NULL, 0, &device->geometry.sectors) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Failed to create disk pool geometry")); return -1; } return 0; } static int virStorageBackendDiskReadGeometry(virStoragePoolObjPtr pool) { char *parthelper_path; virCommandPtr cmd; int ret; if (!(parthelper_path = virFileFindResource("libvirt_parthelper", "src", LIBEXECDIR))) return -1; cmd = virCommandNewArgList(parthelper_path, pool->def->source.devices[0].path, "-g", NULL); ret = virCommandRunNul(cmd, 3, virStorageBackendDiskMakePoolGeometry, pool); virCommandFree(cmd); VIR_FREE(parthelper_path); return ret; } static int virStorageBackendDiskRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool) { VIR_FREE(pool->def->source.devices[0].freeExtents); pool->def->source.devices[0].nfreeExtent = 0; virFileWaitForDevices(); if (!virFileExists(pool->def->source.devices[0].path)) { virReportError(VIR_ERR_INVALID_ARG, _("device path '%s' doesn't exist"), pool->def->source.devices[0].path); return -1; } if (virStorageBackendDiskReadGeometry(pool) != 0) { return -1; } return virStorageBackendDiskReadPartitions(pool, NULL); } /** * Check for a valid disk label (partition table) on device * * return: 0 - valid disk label found * >0 - no or unrecognized disk label * <0 - error finding the disk label */ static int virStorageBackendDiskFindLabel(const char* device) { const char *const args[] = { device, "print", "--script", NULL, }; virCommandPtr cmd = virCommandNew(PARTED); char *output = NULL; int ret = -1; virCommandAddArgSet(cmd, args); virCommandAddEnvString(cmd, "LC_ALL=C"); virCommandSetOutputBuffer(cmd, &output); /* if parted succeeds we have a valid partition table */ ret = virCommandRun(cmd, NULL); if (ret < 0) { if (strstr(output, "unrecognised disk label")) ret = 1; } virCommandFree(cmd); VIR_FREE(output); return ret; } /** * Write a new partition table header */ static int virStorageBackendDiskBuildPool(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool, unsigned int flags) { bool ok_to_mklabel = false; int ret = -1; virCommandPtr cmd = NULL; virCheckFlags(VIR_STORAGE_POOL_BUILD_OVERWRITE | VIR_STORAGE_POOL_BUILD_NO_OVERWRITE, ret); if (flags == (VIR_STORAGE_POOL_BUILD_OVERWRITE | VIR_STORAGE_POOL_BUILD_NO_OVERWRITE)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("Overwrite and no overwrite flags" " are mutually exclusive")); goto error; } if (flags & VIR_STORAGE_POOL_BUILD_OVERWRITE) ok_to_mklabel = true; else { int check; check = virStorageBackendDiskFindLabel( pool->def->source.devices[0].path); if (check > 0) { ok_to_mklabel = true; } else if (check < 0) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("Error checking for disk label")); } else { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("Disk label already present")); } } if (ok_to_mklabel) { /* eg parted /dev/sda mklabel msdos */ cmd = virCommandNewArgList(PARTED, pool->def->source.devices[0].path, "mklabel", "--script", ((pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) ? "msdos" : virStoragePoolFormatDiskTypeToString(pool->def->source.format)), NULL); ret = virCommandRun(cmd, NULL); } error: virCommandFree(cmd); return ret; } /** * Decides what kind of partition type that should be created. * Important when the partition table is of msdos type */ static int virStorageBackendDiskPartTypeToCreate(virStoragePoolObjPtr pool) { if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) { /* count primary and extended partitions, can't be more than 3 to create a new primary partition */ size_t i; int count = 0; for (i = 0; i < pool->volumes.count; i++) { int partType = pool->volumes.objs[i]->source.partType; if (partType == VIR_STORAGE_VOL_DISK_TYPE_PRIMARY || partType == VIR_STORAGE_VOL_DISK_TYPE_EXTENDED) count++; } if (count >= 4) { return VIR_STORAGE_VOL_DISK_TYPE_LOGICAL; } } /* for all other cases, all partitions are primary */ return VIR_STORAGE_VOL_DISK_TYPE_PRIMARY; } static int virStorageBackendDiskPartFormat(virStoragePoolObjPtr pool, virStorageVolDefPtr vol, char** partFormat) { size_t i; if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) { const char *partedFormat; partedFormat = virStoragePartedFsTypeToString(vol->target.format); if (partedFormat == NULL) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Invalid partition type")); return -1; } if (vol->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) { /* make sure we don't have a extended partition already */ for (i = 0; i < pool->volumes.count; i++) { if (pool->volumes.objs[i]->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("extended partition already exists")); return -1; } } if (VIR_STRDUP(*partFormat, partedFormat) < 0) return -1; } else { /* create primary partition as long as it is possible and after that check if an extended partition exists to create logical partitions. */ /* XXX Only support one extended partition */ switch (virStorageBackendDiskPartTypeToCreate(pool)) { case VIR_STORAGE_VOL_DISK_TYPE_PRIMARY: if (virAsprintf(partFormat, "primary %s", partedFormat) < 0) return -1; break; case VIR_STORAGE_VOL_DISK_TYPE_LOGICAL: /* make sure we have a extended partition */ for (i = 0; i < pool->volumes.count; i++) { if (pool->volumes.objs[i]->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) { if (virAsprintf(partFormat, "logical %s", partedFormat) < 0) return -1; break; } } if (i == pool->volumes.count) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no extended partition found and no primary partition available")); return -1; } break; default: virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unknown partition type")); return -1; } } } else { if (VIR_STRDUP(*partFormat, "primary") < 0) return -1; } return 0; } /** * Aligns a new partition to nearest cylinder boundary * when having a msdos partition table type * to avoid any problem with already existing * partitions */ static int virStorageBackendDiskPartBoundaries(virStoragePoolObjPtr pool, unsigned long long *start, unsigned long long *end, unsigned long long allocation) { size_t i; int smallestExtent = -1; unsigned long long smallestSize = 0; unsigned long long extraBytes = 0; unsigned long long alignedAllocation = allocation; virStoragePoolSourceDevicePtr dev = &pool->def->source.devices[0]; unsigned long long cylinderSize = dev->geometry.heads * dev->geometry.sectors * SECTOR_SIZE; VIR_DEBUG("find free area: allocation %llu, cyl size %llu", allocation, cylinderSize); int partType = virStorageBackendDiskPartTypeToCreate(pool); /* how many extra bytes we have since we allocate aligned to the cylinder boundary */ extraBytes = cylinderSize - (allocation % cylinderSize); for (i = 0; i < dev->nfreeExtent; i++) { unsigned long long size = dev->freeExtents[i].end - dev->freeExtents[i].start; unsigned long long neededSize = allocation; if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) { /* align to cylinder boundary */ neededSize += extraBytes; if ((*start % cylinderSize) > extraBytes) { /* add an extra cylinder if the offset can't fit within the extra bytes we have */ neededSize += cylinderSize; } /* if we are creating a logical partition, we need one extra block between partitions (or actually move start one block) */ if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL) { size -= SECTOR_SIZE; } } if (size > neededSize && (smallestSize == 0 || size < smallestSize)) { /* for logical partition, the free extent must be within a logical free area */ if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL && dev->freeExtents[i].type != VIR_STORAGE_FREE_LOGICAL) { continue; /* for primary partition, the free extent must not be within a logical free area */ } else if (partType == VIR_STORAGE_VOL_DISK_TYPE_PRIMARY && dev->freeExtents[i].type != VIR_STORAGE_FREE_NORMAL) { continue; } smallestSize = size; smallestExtent = i; alignedAllocation = neededSize; } } if (smallestExtent == -1) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no large enough free extent")); return -1; } VIR_DEBUG("aligned alloc %llu", alignedAllocation); *start = dev->freeExtents[smallestExtent].start; if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL) { /* for logical partition, skip one block */ *start += SECTOR_SIZE; } *end = *start + alignedAllocation; if (pool->def->source.format == VIR_STORAGE_POOL_DISK_DOS) { /* adjust our allocation if start is not at a cylinder boundary */ *end -= (*start % cylinderSize); } /* counting in bytes, we want the last byte of the current sector */ *end -= 1; VIR_DEBUG("final aligned start %llu, end %llu", *start, *end); return 0; } static int virStorageBackendDiskCreateVol(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool, virStorageVolDefPtr vol) { int res = -1; char *partFormat = NULL; unsigned long long startOffset = 0, endOffset = 0; virCommandPtr cmd = virCommandNewArgList(PARTED, pool->def->source.devices[0].path, "mkpart", "--script", NULL); if (vol->target.encryption != NULL) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("storage pool does not support encrypted " "volumes")); goto cleanup; } if (virStorageBackendDiskPartFormat(pool, vol, &partFormat) != 0) { goto cleanup; } virCommandAddArg(cmd, partFormat); if (virStorageBackendDiskPartBoundaries(pool, &startOffset, &endOffset, vol->target.capacity) != 0) { goto cleanup; } virCommandAddArgFormat(cmd, "%lluB", startOffset); virCommandAddArgFormat(cmd, "%lluB", endOffset); if (virCommandRun(cmd, NULL) < 0) goto cleanup; /* wait for device node to show up */ virFileWaitForDevices(); /* Blow away free extent info, as we're about to re-populate it */ VIR_FREE(pool->def->source.devices[0].freeExtents); pool->def->source.devices[0].nfreeExtent = 0; /* Specifying a target path is meaningless */ VIR_FREE(vol->target.path); /* Fetch actual extent info, generate key */ if (virStorageBackendDiskReadPartitions(pool, vol) < 0) goto cleanup; res = 0; cleanup: VIR_FREE(partFormat); virCommandFree(cmd); return res; } static int virStorageBackendDiskBuildVolFrom(virConnectPtr conn, virStoragePoolObjPtr pool, virStorageVolDefPtr vol, virStorageVolDefPtr inputvol, unsigned int flags) { virStorageBackendBuildVolFrom build_func; build_func = virStorageBackendGetBuildVolFromFunction(vol, inputvol); if (!build_func) return -1; return build_func(conn, pool, vol, inputvol, flags); } static int virStorageBackendDiskDeleteVol(virConnectPtr conn ATTRIBUTE_UNUSED, virStoragePoolObjPtr pool, virStorageVolDefPtr vol, unsigned int flags) { char *part_num = NULL; char *devpath = NULL; char *dev_name, *srcname; virCommandPtr cmd = NULL; bool isDevMapperDevice; int rc = -1; virCheckFlags(0, -1); if (virFileResolveLink(vol->target.path, &devpath) < 0) { virReportSystemError(errno, _("Couldn't read volume target path '%s'"), vol->target.path); goto cleanup; } dev_name = last_component(devpath); srcname = last_component(pool->def->source.devices[0].path); VIR_DEBUG("dev_name=%s, srcname=%s", dev_name, srcname); isDevMapperDevice = virIsDevMapperDevice(devpath); if (!isDevMapperDevice && !STRPREFIX(dev_name, srcname)) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Volume path '%s' did not start with parent " "pool source device name."), dev_name); goto cleanup; } if (!isDevMapperDevice) { part_num = dev_name + strlen(srcname); if (*part_num == 0) { virReportError(VIR_ERR_INTERNAL_ERROR, _("cannot parse partition number from target " "'%s'"), dev_name); goto cleanup; } /* eg parted /dev/sda rm 2 */ cmd = virCommandNewArgList(PARTED, pool->def->source.devices[0].path, "rm", "--script", part_num, NULL); if (virCommandRun(cmd, NULL) < 0) goto cleanup; } else { cmd = virCommandNewArgList(DMSETUP, "remove", "--force", devpath, NULL); if (virCommandRun(cmd, NULL) < 0) goto cleanup; } rc = 0; cleanup: VIR_FREE(devpath); virCommandFree(cmd); return rc; } virStorageBackend virStorageBackendDisk = { .type = VIR_STORAGE_POOL_DISK, .buildPool = virStorageBackendDiskBuildPool, .refreshPool = virStorageBackendDiskRefreshPool, .createVol = virStorageBackendDiskCreateVol, .deleteVol = virStorageBackendDiskDeleteVol, .buildVolFrom = virStorageBackendDiskBuildVolFrom, .uploadVol = virStorageBackendVolUploadLocal, .downloadVol = virStorageBackendVolDownloadLocal, };