storage_backend_disk.c 35.0 KB
Newer Older
1 2 3
/*
 * storage_backend_disk.c: storage backend for disk handling
 *
4
 * Copyright (C) 2007-2016 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright (C) 2007-2008 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22
 */

#include <config.h>
23
#include <unistd.h>
24

25
#include "dirname.h"
26
#include "virerror.h"
27
#include "virlog.h"
28
#include "storage_backend_disk.h"
29
#include "storage_util.h"
30
#include "viralloc.h"
31
#include "vircommand.h"
32
#include "virfile.h"
33
#include "configmake.h"
34
#include "virstring.h"
35

36 37
#define VIR_FROM_THIS VIR_FROM_STORAGE

38 39
VIR_LOG_INIT("storage.storage_backend_disk");

40 41
#define SECTOR_SIZE 512

42 43 44 45 46 47 48 49 50 51 52
static bool
virStorageVolPartFindExtended(virStorageVolDefPtr def,
                              const void *opaque ATTRIBUTE_UNUSED)
{
    if (def->source.partType == VIR_STORAGE_VOL_DISK_TYPE_EXTENDED)
        return true;

    return false;
}


53
static int
54
virStorageBackendDiskMakeDataVol(virStoragePoolObjPtr pool,
55 56 57
                                 char **const groups,
                                 virStorageVolDefPtr vol)
{
58
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
59
    char *tmp, *devpath, *partname;
60
    bool addVol = false;
61 62 63 64 65 66 67 68

    /* Prepended path will be same for all partitions, so we can
     * strip the path to form a reasonable pool-unique name
     */
    if ((tmp = strrchr(groups[0], '/')))
        partname = tmp + 1;
    else
        partname = groups[0];
69 70

    if (vol == NULL) {
71 72 73
        /* This is typically a reload/restart/refresh path where
         * we're discovering the existing partitions for the pool
         */
74
        addVol = true;
75
        if (VIR_ALLOC(vol) < 0)
76
            return -1;
77 78
        if (VIR_STRDUP(vol->name, partname) < 0)
            goto error;
79 80 81
    }

    if (vol->target.path == NULL) {
82
        if (VIR_STRDUP(devpath, groups[0]) < 0)
83
            goto error;
84 85 86 87 88 89 90

        /* Now figure out the stable path
         *
         * XXX this method is O(N) because it scans the pool target
         * dir every time its run. Should figure out a more efficient
         * way of doing this...
         */
91
        vol->target.path = virStorageBackendStablePath(pool, devpath, true);
92
        VIR_FREE(devpath);
93
        if (vol->target.path == NULL)
94
            goto error;
95 96
    }

97 98 99 100 101 102 103 104
    /* Enforce provided vol->name is the same as what parted created.
     * We do this after filling target.path so that we have a chance at
     * deleting the partition with this failure from CreateVol path
     */
    if (STRNEQ(vol->name, partname)) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("invalid partition name '%s', expected '%s'"),
                       vol->name, partname);
105 106 107 108 109 110 111 112

        /* Let's see if by chance parthelper created a name that won't be
         * found later when we try to delete. We tell parthelper to add a 'p'
         * to the output via the part_separator flag, but if devmapper has
         * user_friendly_names set, the creation won't happen that way, thus
         * our deletion will fail because the name we generated is wrong.
         * Check for our conditions and see if the generated name is the
         * same as StablePath returns and has the 'p' in it */
113
        if (def->source.devices[0].part_separator == VIR_TRISTATE_BOOL_YES &&
114 115 116 117 118 119 120 121 122 123 124 125 126
            !virIsDevMapperDevice(vol->target.path) &&
            STREQ(groups[0], vol->target.path) &&
            (tmp = strrchr(groups[0], 'p'))) {

            /* If we remove the 'p' from groups[0] and the resulting
             * device is a devmapper device, then we know parthelper
             * was told to create the wrong name based on the results.
             * So just remove the 'p' from the vol->target.path too. */
            memmove(tmp, tmp + 1, strlen(tmp));
            if (virIsDevMapperDevice(groups[0]) &&
                (tmp = strrchr(vol->target.path, 'p')))
                memmove(tmp, tmp + 1, strlen(tmp));
        }
127
        goto error;
128 129
    }

130 131
    if (vol->key == NULL) {
        /* XXX base off a unique key of the underlying disk */
132
        if (VIR_STRDUP(vol->key, vol->target.path) < 0)
133
            goto error;
134 135 136
    }

    if (vol->source.extents == NULL) {
137
        if (VIR_ALLOC(vol->source.extents) < 0)
138
            goto error;
139 140 141 142
        vol->source.nextent = 1;

        if (virStrToLong_ull(groups[3], NULL, 10,
                             &vol->source.extents[0].start) < 0) {
143 144
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("cannot parse device start location"));
145
            goto error;
146 147 148 149
        }

        if (virStrToLong_ull(groups[4], NULL, 10,
                             &vol->source.extents[0].end) < 0) {
150 151
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("cannot parse device end location"));
152
            goto error;
153 154
        }

155
        if (VIR_STRDUP(vol->source.extents[0].path,
156
                       def->source.devices[0].path) < 0)
157
            goto error;
158 159
    }

160
    /* set partition type */
E
Eric Blake 已提交
161
    if (STREQ(groups[1], "normal"))
162
       vol->source.partType = VIR_STORAGE_VOL_DISK_TYPE_PRIMARY;
E
Eric Blake 已提交
163
    else if (STREQ(groups[1], "logical"))
164
       vol->source.partType = VIR_STORAGE_VOL_DISK_TYPE_LOGICAL;
E
Eric Blake 已提交
165
    else if (STREQ(groups[1], "extended"))
166
       vol->source.partType = VIR_STORAGE_VOL_DISK_TYPE_EXTENDED;
167
    else
168
       vol->source.partType = VIR_STORAGE_VOL_DISK_TYPE_NONE;
169

170 171
    vol->type = VIR_STORAGE_VOL_BLOCK;

172 173 174 175 176 177 178 179 180 181
    /* Refresh allocation/capacity/perms
     *
     * For an extended partition, virStorageBackendUpdateVolInfo will
     * return incorrect values for allocation and capacity, so use the
     * extent information captured above instead.
     *
     * Also once a logical partition exists or another primary partition
     * after an extended partition is created an open on the extended
     * partition will fail, so pass the NOERROR flag and only error if a
     * -1 was returned indicating some other error than an open error.
182 183 184 185 186 187 188 189
     *
     * NB: A small window exists in some cases where the just created
     * partition disappears, but then reappears. Since we were given
     * vol->target.path from parthelper, let's just be sure that any
     * kernel magic that occurs as a result of parthelper doesn't cause
     * us to fail with some sort of ENOENT failure since that would be
     * quite "unexpected". So rather than just fail, let's use the
     * virWaitForDevices to ensure everything has settled properly.
190
     */
191
    virWaitForDevices();
192
    if (vol->source.partType == VIR_STORAGE_VOL_DISK_TYPE_EXTENDED) {
193
        if (virStorageBackendUpdateVolInfo(vol, false,
194
                                           VIR_STORAGE_VOL_OPEN_DEFAULT |
195 196
                                           VIR_STORAGE_VOL_OPEN_NOERROR,
                                           0) == -1)
197
            goto error;
198 199
        vol->target.allocation = 0;
        vol->target.capacity =
200 201
            (vol->source.extents[0].end - vol->source.extents[0].start);
    } else {
202
        if (virStorageBackendUpdateVolInfo(vol, false,
203
                                           VIR_STORAGE_VOL_OPEN_DEFAULT, 0) < 0)
204
            goto error;
205
    }
206

207 208 209 210 211 212
    /* Now that we've updated @vol enough, let's add it to the pool
     * if it's not already there so that the subsequent pool search
     * pool def adjustments will work properly */
    if (addVol && virStoragePoolObjAddVol(pool, vol) < 0)
        goto error;

213 214
    /* Find the extended partition and increase the allocation value */
    if (vol->source.partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL) {
215
        virStorageVolDefPtr voldef;
216

217 218 219 220 221
        voldef = virStoragePoolObjSearchVolume(pool,
                                               virStorageVolPartFindExtended,
                                               NULL);
        if (voldef)
            voldef->target.allocation += vol->target.allocation;
222 223
    }

224
    if (STRNEQ(groups[2], "metadata"))
225 226 227
        def->allocation += vol->target.allocation;
    if (vol->source.extents[0].end > def->capacity)
        def->capacity = vol->source.extents[0].end;
228 229

    return 0;
230 231 232 233 234

 error:
    if (addVol)
        virStorageVolDefFree(vol);
    return -1;
235 236 237
}

static int
238
virStorageBackendDiskMakeFreeExtent(virStoragePoolObjPtr pool,
239 240
                                    char **const groups)
{
241 242
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
    virStoragePoolSourceDevicePtr dev = &def->source.devices[0];
243

244 245
    if (VIR_REALLOC_N(dev->freeExtents,
                      dev->nfreeExtent + 1) < 0)
246 247 248
        return -1;

    memset(dev->freeExtents +
249 250
           dev->nfreeExtent, 0,
           sizeof(dev->freeExtents[0]));
251

252
    /* set type of free area */
E
Eric Blake 已提交
253
    if (STREQ(groups[1], "logical")) {
254 255 256 257 258 259
        dev->freeExtents[dev->nfreeExtent].type = VIR_STORAGE_FREE_LOGICAL;
    } else {
        dev->freeExtents[dev->nfreeExtent].type = VIR_STORAGE_FREE_NORMAL;
    }


260 261 262 263 264 265 266 267
    if (virStrToLong_ull(groups[3], NULL, 10,
                         &dev->freeExtents[dev->nfreeExtent].start) < 0)
        return -1; /* Don't bother to re-alloc freeExtents - it'll be free'd shortly */

    if (virStrToLong_ull(groups[4], NULL, 10,
                         &dev->freeExtents[dev->nfreeExtent].end) < 0)
        return -1; /* Don't bother to re-alloc freeExtents - it'll be free'd shortly */

268
    /* first block reported as free, even if it is not */
269
    if (dev->freeExtents[dev->nfreeExtent].start == 0)
270 271
        dev->freeExtents[dev->nfreeExtent].start = SECTOR_SIZE;

272 273 274 275
    def->available += (dev->freeExtents[dev->nfreeExtent].end -
                       dev->freeExtents[dev->nfreeExtent].start);
    if (dev->freeExtents[dev->nfreeExtent].end > def->capacity)
        def->capacity = dev->freeExtents[dev->nfreeExtent].end;
276 277 278 279 280 281 282

    dev->nfreeExtent++;

    return 0;
}


283 284 285 286 287
struct virStorageBackendDiskPoolVolData {
    virStoragePoolObjPtr pool;
    virStorageVolDefPtr vol;
};

288
static int
289
virStorageBackendDiskMakeVol(size_t ntok ATTRIBUTE_UNUSED,
290
                             char **const groups,
291
                             void *opaque)
292
{
293 294
    struct virStorageBackendDiskPoolVolData *data = opaque;
    virStoragePoolObjPtr pool = data->pool;
295 296 297 298 299 300 301 302 303 304 305 306
    /*
     * Ignore normal+metadata, and logical+metadata partitions
     * since they're basically internal book-keeping regions
     * we have no control over. Do keep extended+metadata though
     * because that's the MS-DOS extended partition region we
     * need to be able to view/create/delete
     */
    if ((STREQ(groups[1], "normal") ||
         STREQ(groups[1], "logical")) &&
        STREQ(groups[2], "metadata"))
        return 0;

R
Richard W.M. Jones 已提交
307
    /* Remaining data / metadata parts get turn into volumes... */
308 309
    if (STREQ(groups[2], "metadata") ||
        STREQ(groups[2], "data")) {
310
        virStorageVolDefPtr vol = data->vol;
311 312 313 314 315 316 317 318 319 320 321 322

        if (vol) {
            /* We're searching for a specific vol only */
            if (vol->key) {
                if (STRNEQ(vol->key, groups[0]))
                    return 0;
            } else if (virStorageVolDefFindByKey(pool, groups[0]) != NULL) {
                /* If no key, the volume must be newly created. If groups[0]
                 * isn't already a volume, assume it's the path we want */
                return 0;
            }
        }
323

324
        return virStorageBackendDiskMakeDataVol(pool, groups, vol);
325 326
    } else if (STREQ(groups[2], "free")) {
        /* ....or free space extents */
327
        return virStorageBackendDiskMakeFreeExtent(pool, groups);
328
    } else {
R
Richard W.M. Jones 已提交
329
        /* This code path should never happen unless someone changed
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
         * libvirt_parthelper forgot to change this code */
        return -1;
    }
}

/* To get a list of partitions we run an external helper
 * tool which then uses parted APIs. This is because
 * parted's API is not compatible with libvirt's license
 * but we really really want to use parted because the
 * other options all suck :-)
 *
 * All the other storage backends run an external tool for
 * listing volumes so this really isn't too much of a pain,
 * and we can even ensure the output is friendly.
 */
static int
346
virStorageBackendDiskReadPartitions(virStoragePoolObjPtr pool,
347 348 349 350 351 352 353 354 355
                                    virStorageVolDefPtr vol)
{
    /*
     *  # libvirt_parthelper DEVICE
     * /dev/sda1      normal       data        32256    106928128    106896384
     * /dev/sda2      normal       data    106928640 100027629568  99920701440
     * -              normal   metadata 100027630080 100030242304      2612736
     *
     */
356

357
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
358 359
    char *parthelper_path;
    virCommandPtr cmd;
360 361 362 363
    struct virStorageBackendDiskPoolVolData cbdata = {
        .pool = pool,
        .vol = vol,
    };
364
    int ret;
365

366
    if (!(parthelper_path = virFileFindResource("libvirt_parthelper",
367
                                                abs_topbuilddir "/src",
368 369 370 371
                                                LIBEXECDIR)))
        return -1;

    cmd = virCommandNewArgList(parthelper_path,
372
                               def->source.devices[0].path,
373 374
                               NULL);

375
    /* Check for the presence of the part_separator='yes'. Pass this
376
     * along to the libvirt_parthelper as option '-p'. This will cause
377 378 379
     * libvirt_parthelper to append the "p" partition separator to
     * the generated device name for a source device which ends with
     * a non-numeric value (e.g. mpatha would generate mpathap#).
380
     */
381
    if (def->source.devices[0].part_separator == VIR_TRISTATE_BOOL_YES)
382 383
        virCommandAddArg(cmd, "-p");

384 385 386 387
    /* If a volume is passed, virStorageBackendDiskMakeVol only updates the
     * pool allocation for that single volume.
     */
    if (!vol)
388 389
        def->allocation = 0;
    def->capacity = def->available = 0;
390

391 392 393 394
    ret = virCommandRunNul(cmd,
                           6,
                           virStorageBackendDiskMakeVol,
                           &cbdata);
395
    virCommandFree(cmd);
396
    VIR_FREE(parthelper_path);
397
    return ret;
398 399
}

400
static int
401
virStorageBackendDiskMakePoolGeometry(size_t ntok ATTRIBUTE_UNUSED,
402
                                      char **const groups,
403
                                      void *data)
404
{
405
    virStoragePoolObjPtr pool = data;
406 407
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
    virStoragePoolSourceDevicePtr device = &(def->source.devices[0]);
P
Peter Krempa 已提交
408 409 410 411 412 413 414
    if (virStrToLong_i(groups[0], NULL, 0, &device->geometry.cylinders) < 0 ||
        virStrToLong_i(groups[1], NULL, 0, &device->geometry.heads) < 0 ||
        virStrToLong_i(groups[2], NULL, 0, &device->geometry.sectors) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Failed to create disk pool geometry"));
        return -1;
    }
415

P
Peter Krempa 已提交
416
    return 0;
417 418 419
}

static int
420
virStorageBackendDiskReadGeometry(virStoragePoolObjPtr pool)
421
{
422
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
423 424 425 426 427
    char *parthelper_path;
    virCommandPtr cmd;
    int ret;

    if (!(parthelper_path = virFileFindResource("libvirt_parthelper",
428
                                                abs_topbuilddir "/src",
429 430 431 432
                                                LIBEXECDIR)))
        return -1;

    cmd = virCommandNewArgList(parthelper_path,
433 434 435
                               def->source.devices[0].path,
                               "-g",
                               NULL);
436

437 438 439 440
    ret = virCommandRunNul(cmd,
                           3,
                           virStorageBackendDiskMakePoolGeometry,
                           pool);
441
    virCommandFree(cmd);
442
    VIR_FREE(parthelper_path);
443
    return ret;
444
}
445 446

static int
447
virStorageBackendDiskRefreshPool(virStoragePoolObjPtr pool)
448
{
449 450 451 452
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);

    VIR_FREE(def->source.devices[0].freeExtents);
    def->source.devices[0].nfreeExtent = 0;
453

J
John Ferlan 已提交
454
    virWaitForDevices();
455

456
    if (!virFileExists(def->source.devices[0].path)) {
457 458
        virReportError(VIR_ERR_INVALID_ARG,
                       _("device path '%s' doesn't exist"),
459
                       def->source.devices[0].path);
460 461 462
        return -1;
    }

463
    if (virStorageBackendDiskReadGeometry(pool) != 0)
464 465
        return -1;

466
    return virStorageBackendDiskReadPartitions(pool, NULL);
467 468 469
}


470
static int
471
virStorageBackendDiskStartPool(virStoragePoolObjPtr pool)
472
{
473
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
474
    const char *format;
475
    const char *path = def->source.devices[0].path;
476

J
John Ferlan 已提交
477
    virWaitForDevices();
478

479
    if (!virFileExists(path)) {
480
        virReportError(VIR_ERR_INVALID_ARG,
481
                       _("device path '%s' doesn't exist"), path);
482 483 484
        return -1;
    }

485 486 487
    if (def->source.format == VIR_STORAGE_POOL_DISK_UNKNOWN)
        def->source.format = VIR_STORAGE_POOL_DISK_DOS;
    format = virStoragePoolFormatDiskTypeToString(def->source.format);
488
    if (!virStorageBackendDeviceIsEmpty(path, format, false))
489 490 491 492 493 494
        return -1;

    return 0;
}


495 496 497 498
/**
 * Write a new partition table header
 */
static int
499
virStorageBackendDiskBuildPool(virStoragePoolObjPtr pool,
E
Eric Blake 已提交
500
                               unsigned int flags)
501
{
502 503
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
    int format = def->source.format;
504
    const char *fmt;
505 506
    bool ok_to_mklabel = false;
    int ret = -1;
507
    virCommandPtr cmd = NULL;
508

509 510
    virCheckFlags(VIR_STORAGE_POOL_BUILD_OVERWRITE |
                  VIR_STORAGE_POOL_BUILD_NO_OVERWRITE, ret);
E
Eric Blake 已提交
511

512 513 514
    VIR_EXCLUSIVE_FLAGS_GOTO(VIR_STORAGE_POOL_BUILD_OVERWRITE,
                             VIR_STORAGE_POOL_BUILD_NO_OVERWRITE,
                             error);
515

516 517
    fmt = virStoragePoolFormatDiskTypeToString(format);
    if (flags & VIR_STORAGE_POOL_BUILD_OVERWRITE) {
518
        ok_to_mklabel = true;
519
    } else {
520
        if (virStorageBackendDeviceIsEmpty(def->source.devices[0].path,
521
                                           fmt, true))
522 523
            ok_to_mklabel = true;
    }
524

525
    if (ok_to_mklabel) {
526
        if (virStorageBackendZeroPartitionTable(def->source.devices[0].path,
527 528 529
                                                1024 * 1024) < 0)
            goto error;

530
        /* eg parted /dev/sda mklabel --script msdos */
531
        if (format == VIR_STORAGE_POOL_DISK_UNKNOWN)
532
            format = def->source.format = VIR_STORAGE_POOL_DISK_DOS;
533
        if (format == VIR_STORAGE_POOL_DISK_DOS)
534 535 536 537
            fmt = "msdos";
        else
            fmt = virStoragePoolFormatDiskTypeToString(format);

538
        cmd = virCommandNewArgList(PARTED,
539
                                   def->source.devices[0].path,
540 541
                                   "mklabel",
                                   "--script",
542
                                   fmt,
543
                                   NULL);
544
        ret = virCommandRun(cmd, NULL);
545
    }
546

547
 error:
548
    virCommandFree(cmd);
549
    return ret;
550 551
}

552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570

struct virStorageVolNumData {
    int count;
};

static int
virStorageVolNumOfPartTypes(virStorageVolDefPtr def,
                            const void *opaque)
{
    struct virStorageVolNumData *data = (struct virStorageVolNumData *)opaque;

    if (def->source.partType == VIR_STORAGE_VOL_DISK_TYPE_PRIMARY ||
        def->source.partType == VIR_STORAGE_VOL_DISK_TYPE_EXTENDED)
        data->count++;

    return 0;
}


571 572 573 574
/**
 * Decides what kind of partition type that should be created.
 * Important when the partition table is of msdos type
 */
575
static int
576 577
virStorageBackendDiskPartTypeToCreate(virStoragePoolObjPtr pool)
{
578
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
579 580
    struct virStorageVolNumData data = { .count = 0 };

581
    if (def->source.format == VIR_STORAGE_POOL_DISK_DOS) {
582
        /* count primary and extended partitions,
583
           can't be more than 3 to create a new primary partition */
584 585 586 587
        if (virStoragePoolObjForEachVolume(pool, virStorageVolNumOfPartTypes,
                                           &data) == 0) {
            if (data.count >= 4)
                return VIR_STORAGE_VOL_DISK_TYPE_LOGICAL;
588 589 590 591 592 593 594 595
        }
    }

    /* for all other cases, all partitions are primary */
    return VIR_STORAGE_VOL_DISK_TYPE_PRIMARY;
}

static int
596
virStorageBackendDiskPartFormat(virStoragePoolObjPtr pool,
597
                                virStorageVolDefPtr vol,
E
Eric Blake 已提交
598
                                char** partFormat)
599
{
600 601 602
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);

    if (def->source.format == VIR_STORAGE_POOL_DISK_DOS) {
E
Eric Blake 已提交
603
        const char *partedFormat;
604
        partedFormat = virStoragePartedFsTypeToString(vol->target.format);
E
Eric Blake 已提交
605
        if (partedFormat == NULL) {
606 607
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("Invalid partition type"));
E
Eric Blake 已提交
608
            return -1;
609 610
        }
        if (vol->target.format == VIR_STORAGE_VOL_DISK_EXTENDED) {
Y
Yuri Chornoivan 已提交
611
            /* make sure we don't have an extended partition already */
612 613 614
            if (virStoragePoolObjSearchVolume(pool,
                                              virStorageVolPartFindExtended,
                                              NULL)) {
615 616
                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                                   _("extended partition already exists"));
E
Eric Blake 已提交
617
                    return -1;
618
            }
619
            if (VIR_STRDUP(*partFormat, partedFormat) < 0)
E
Eric Blake 已提交
620
                return -1;
621 622 623 624 625 626
        } else {
            /* create primary partition as long as it is possible
               and after that check if an extended partition exists
               to create logical partitions. */
            /* XXX Only support one extended partition */
            switch (virStorageBackendDiskPartTypeToCreate(pool)) {
E
Eric Blake 已提交
627
            case VIR_STORAGE_VOL_DISK_TYPE_PRIMARY:
628
                if (virAsprintf(partFormat, "primary %s", partedFormat) < 0)
E
Eric Blake 已提交
629
                    return -1;
E
Eric Blake 已提交
630 631
                break;
            case VIR_STORAGE_VOL_DISK_TYPE_LOGICAL:
Y
Yuri Chornoivan 已提交
632
                /* make sure we have an extended partition */
633 634 635 636 637 638 639 640 641 642
                if (virStoragePoolObjSearchVolume(pool,
                                                  virStorageVolPartFindExtended,
                                                  NULL)) {
                    if (virAsprintf(partFormat, "logical %s",
                                    partedFormat) < 0)
                        return -1;
                } else {
                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                                   _("no extended partition found and no "
                                     "primary partition available"));
E
Eric Blake 已提交
643 644 645 646
                    return -1;
                }
                break;
            default:
647 648
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               "%s", _("unknown partition type"));
E
Eric Blake 已提交
649
                return -1;
650 651 652
            }
        }
    } else {
653
        if (VIR_STRDUP(*partFormat, "primary") < 0)
E
Eric Blake 已提交
654
            return -1;
655 656 657 658 659
    }
    return 0;
}

/**
J
Ján Tomko 已提交
660
 * Aligns a new partition to nearest cylinder boundary
E
Eric Blake 已提交
661
 * when having a msdos partition table type
J
Ján Tomko 已提交
662
 * to avoid any problem with already existing
663 664 665
 * partitions
 */
static int
666 667 668 669
virStorageBackendDiskPartBoundaries(virStoragePoolObjPtr pool,
                                    unsigned long long *start,
                                    unsigned long long *end,
                                    unsigned long long allocation)
670
{
671
    size_t i;
672
    int smallestExtent = -1;
673 674 675
    unsigned long long smallestSize = 0;
    unsigned long long extraBytes = 0;
    unsigned long long alignedAllocation = allocation;
676 677
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
    virStoragePoolSourceDevicePtr dev = &def->source.devices[0];
678
    unsigned long long cylinderSize = (unsigned long long)dev->geometry.heads *
679 680
                                      dev->geometry.sectors * SECTOR_SIZE;

681
    VIR_DEBUG("find free area: allocation %llu, cyl size %llu", allocation,
E
Eric Blake 已提交
682
          cylinderSize);
683 684 685
    int partType = virStorageBackendDiskPartTypeToCreate(pool);

    /* how many extra bytes we have since we allocate
J
Ján Tomko 已提交
686
       aligned to the cylinder boundary */
687 688
    extraBytes = cylinderSize - (allocation % cylinderSize);

689
    for (i = 0; i < dev->nfreeExtent; i++) {
690 691 692 693 694
         unsigned long long size =
             dev->freeExtents[i].end -
             dev->freeExtents[i].start;
         unsigned long long neededSize = allocation;

695
         if (def->source.format == VIR_STORAGE_POOL_DISK_DOS) {
J
Ján Tomko 已提交
696
             /* align to cylinder boundary */
697 698 699 700 701 702
             neededSize += extraBytes;
             if ((*start % cylinderSize) > extraBytes) {
                 /* add an extra cylinder if the offset can't fit within
                    the extra bytes we have */
                 neededSize += cylinderSize;
             }
703
             /* if we are creating a logical partition, we need one extra
704
                block between partitions (or actually move start one block) */
705
             if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL)
706 707 708 709 710 711 712 713 714 715 716 717
                 size -= SECTOR_SIZE;
         }
         if (size > neededSize &&
             (smallestSize == 0 ||
             size < smallestSize)) {
             /* for logical partition, the free extent
                must be within a logical free area */
             if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL &&
                 dev->freeExtents[i].type != VIR_STORAGE_FREE_LOGICAL) {
                 continue;
                 /* for primary partition, the free extent
                    must not be within a logical free area */
E
Eric Blake 已提交
718 719 720
             } else if (partType == VIR_STORAGE_VOL_DISK_TYPE_PRIMARY &&
                        dev->freeExtents[i].type != VIR_STORAGE_FREE_NORMAL) {
                 continue;
721 722 723 724 725 726 727 728
             }
             smallestSize = size;
             smallestExtent = i;
             alignedAllocation = neededSize;
         }
    }

    if (smallestExtent == -1) {
729 730
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("no large enough free extent"));
731 732 733
        return -1;
    }

734
    VIR_DEBUG("aligned alloc %llu", alignedAllocation);
735 736 737 738 739 740 741 742
    *start = dev->freeExtents[smallestExtent].start;

    if (partType == VIR_STORAGE_VOL_DISK_TYPE_LOGICAL) {
        /* for logical partition, skip one block */
        *start += SECTOR_SIZE;
    }

    *end = *start + alignedAllocation;
743
    if (def->source.format == VIR_STORAGE_POOL_DISK_DOS) {
J
Ján Tomko 已提交
744
        /* adjust our allocation if start is not at a cylinder boundary */
745 746 747
        *end -= (*start % cylinderSize);
    }

748
    /* counting in bytes, we want the last byte of the current sector */
749
    *end -= 1;
750
    VIR_DEBUG("final aligned start %llu, end %llu", *start, *end);
751 752 753 754
    return 0;
}


755 756 757 758 759 760 761 762 763 764
/* virStorageBackendDiskDeleteVol
 * @pool: Pointer to the storage pool
 * @vol: Pointer to the volume definition
 * @flags: flags (unused for now)
 *
 * This API will remove the disk volume partition either from direct
 * API call or as an error path during creation when the partition
 * name provided during create doesn't match the name read from
 * virStorageBackendDiskReadPartitions.
 *
765
 * For a device mapper device, device representation is dependent upon
766 767 768 769 770 771 772 773 774 775 776 777 778
 * device mapper configuration, but the general rule of thumb is that at
 * creation if a device name ends with a number, then a partition separator
 * "p" is added to the created name; otherwise, if the device name doesn't
 * end with a number, then there is no partition separator. This name is
 * what ends up in the vol->target.path. This ends up being a link to a
 * /dev/mapper/dm-# device which cannot be used in the algorithm to determine
 * which partition to remove, but a properly handled target.path can be.
 *
 * For non device mapper devices, just need to resolve the link of the
 * vol->target.path in order to get the path.
 *
 * Returns 0 on success, -1 on failure with error message set.
 */
779
static int
780
virStorageBackendDiskDeleteVol(virStoragePoolObjPtr pool,
781 782 783 784 785
                               virStorageVolDefPtr vol,
                               unsigned int flags)
{
    char *part_num = NULL;
    char *devpath = NULL;
786
    char *dev_name;
787 788
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
    char *src_path = def->source.devices[0].path;
789
    char *srcname = last_component(src_path);
790 791 792 793 794 795
    virCommandPtr cmd = NULL;
    bool isDevMapperDevice;
    int rc = -1;

    virCheckFlags(0, -1);

796 797 798
    if (!vol->target.path) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("volume target path empty for source path '%s'"),
799
                      src_path);
800 801 802
        return -1;
    }

803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
    /* NB: This is the corollary to the algorithm in libvirt_parthelper
     *     (parthelper.c) that is used to generate the target.path name
     *     for use by libvirt. Changes to either, need to be reflected
     *     in both places */
    isDevMapperDevice = virIsDevMapperDevice(vol->target.path);
    if (isDevMapperDevice) {
        dev_name = last_component(vol->target.path);
    } else {
        if (virFileResolveLink(vol->target.path, &devpath) < 0) {
            virReportSystemError(errno,
                                 _("Couldn't read volume target path '%s'"),
                                 vol->target.path);
            goto cleanup;
        }
        dev_name = last_component(devpath);
818 819 820 821
    }

    VIR_DEBUG("dev_name=%s, srcname=%s", dev_name, srcname);

822
    if (!STRPREFIX(dev_name, srcname)) {
823 824 825 826 827 828
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Volume path '%s' did not start with parent "
                         "pool source device name."), dev_name);
        goto cleanup;
    }

829
    part_num = dev_name + strlen(srcname);
830

831 832 833 834
    /* For device mapper and we have a partition character 'p' as the
     * current character, let's move beyond that before checking part_num */
    if (isDevMapperDevice && *part_num == 'p')
        part_num++;
835

836 837 838 839 840
    if (*part_num == 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("cannot parse partition number from target "
                         "'%s'"), dev_name);
        goto cleanup;
841 842
    }

843 844 845 846 847 848 849 850 851 852
    /* eg parted /dev/sda rm 2 or /dev/mapper/mpathc rm 2 */
    cmd = virCommandNewArgList(PARTED,
                               src_path,
                               "rm",
                               "--script",
                               part_num,
                               NULL);
    if (virCommandRun(cmd, NULL) < 0)
        goto cleanup;

853 854 855 856
    /* Refreshing the pool is the easiest option as LOGICAL and EXTENDED
     * partition allocation/capacity management is handled within
     * virStorageBackendDiskMakeDataVol and trying to redo that logic
     * here is pointless
857
     */
858
    virStoragePoolObjClearVols(pool);
859
    if (virStorageBackendDiskRefreshPool(pool) < 0)
860
        goto cleanup;
861

862 863 864 865 866 867 868 869
    rc = 0;
 cleanup:
    VIR_FREE(devpath);
    virCommandFree(cmd);
    return rc;
}


870
static int
871
virStorageBackendDiskCreateVol(virStoragePoolObjPtr pool,
872 873
                               virStorageVolDefPtr vol)
{
E
Eric Blake 已提交
874
    int res = -1;
875
    char *partFormat = NULL;
876
    unsigned long long startOffset = 0, endOffset = 0;
877
    virStoragePoolDefPtr def = virStoragePoolObjGetDef(pool);
878
    virErrorPtr save_err;
879
    virCommandPtr cmd = virCommandNewArgList(PARTED,
880
                                             def->source.devices[0].path,
881 882 883
                                             "mkpart",
                                             "--script",
                                             NULL);
884

885 886 887 888
    if (vol->target.encryption &&
        vol->target.encryption->format != VIR_STORAGE_ENCRYPTION_FORMAT_LUKS) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("storage pool only supports LUKS encrypted volumes"));
889
        goto cleanup;
890
    }
891

892
    if (virStorageBackendDiskPartFormat(pool, vol, &partFormat) != 0)
893
        goto cleanup;
894
    virCommandAddArg(cmd, partFormat);
895

896 897 898 899 900 901 902
    /* If we're going to encrypt using LUKS, then we could need up to
     * an extra 2MB for the LUKS header - so account for that now */
    if (vol->target.encryption)
        vol->target.capacity += 2 * 1024 * 1024;

    if (virStorageBackendDiskPartBoundaries(pool, &startOffset, &endOffset,
                                            vol->target.capacity) < 0)
E
Eric Blake 已提交
903
        goto cleanup;
904

905 906
    virCommandAddArgFormat(cmd, "%lluB", startOffset);
    virCommandAddArgFormat(cmd, "%lluB", endOffset);
907

908
    if (virCommandRun(cmd, NULL) < 0)
E
Eric Blake 已提交
909
        goto cleanup;
910

911
    /* wait for device node to show up */
J
John Ferlan 已提交
912
    virWaitForDevices();
913

914
    /* Blow away free extent info, as we're about to re-populate it */
915 916
    VIR_FREE(def->source.devices[0].freeExtents);
    def->source.devices[0].nfreeExtent = 0;
917

918 919 920 921
    /* Specifying a target path is meaningless */
    VIR_FREE(vol->target.path);

    /* Fetch actual extent info, generate key */
922 923 924 925 926 927 928 929 930
    if (virStorageBackendDiskReadPartitions(pool, vol) < 0)
        goto error;

    if (vol->target.encryption) {
        /* Adjust the sizes to account for the LUKS header */
        vol->target.capacity -= 2 * 1024 * 1024;
        vol->target.allocation -= 2 * 1024 * 1024;
        if (virStorageBackendCreateVolUsingQemuImg(pool, vol, NULL, 0) < 0)
            goto error;
931
    }
932

E
Eric Blake 已提交
933 934
    res = 0;

935
 cleanup:
E
Eric Blake 已提交
936
    VIR_FREE(partFormat);
937
    virCommandFree(cmd);
E
Eric Blake 已提交
938
    return res;
939 940 941 942 943 944 945 946 947 948

 error:
    /* Best effort to remove the partition. Ignore any errors
     * since we could be calling this with vol->target.path == NULL
     */
    save_err = virSaveLastError();
    ignore_value(virStorageBackendDiskDeleteVol(pool, vol, 0));
    virSetError(save_err);
    virFreeError(save_err);
    goto cleanup;
949 950
}

951

952
static int
953
virStorageBackendDiskBuildVolFrom(virStoragePoolObjPtr pool,
954 955 956 957 958 959
                                  virStorageVolDefPtr vol,
                                  virStorageVolDefPtr inputvol,
                                  unsigned int flags)
{
    virStorageBackendBuildVolFrom build_func;

960
    build_func = virStorageBackendGetBuildVolFromFunction(vol, inputvol);
961 962 963
    if (!build_func)
        return -1;

964
    return build_func(pool, vol, inputvol, flags);
965
}
966 967


968
static int
969
virStorageBackendDiskVolWipe(virStoragePoolObjPtr pool,
970 971 972 973 974
                             virStorageVolDefPtr vol,
                             unsigned int algorithm,
                             unsigned int flags)
{
    if (vol->source.partType != VIR_STORAGE_VOL_DISK_TYPE_EXTENDED)
975
        return virStorageBackendVolWipeLocal(pool, vol, algorithm, flags);
976 977 978 979 980 981 982 983 984

    /* Wiping an extended partition is not support */
    virReportError(VIR_ERR_NO_SUPPORT,
                   _("cannot wipe extended partition '%s'"),
                   vol->target.path);
    return -1;
}


985 986 987
virStorageBackend virStorageBackendDisk = {
    .type = VIR_STORAGE_POOL_DISK,

988
    .startPool = virStorageBackendDiskStartPool,
989 990 991 992 993
    .buildPool = virStorageBackendDiskBuildPool,
    .refreshPool = virStorageBackendDiskRefreshPool,

    .createVol = virStorageBackendDiskCreateVol,
    .deleteVol = virStorageBackendDiskDeleteVol,
994
    .buildVolFrom = virStorageBackendDiskBuildVolFrom,
995 996
    .uploadVol = virStorageBackendVolUploadLocal,
    .downloadVol = virStorageBackendVolDownloadLocal,
997
    .wipeVol = virStorageBackendDiskVolWipe,
998
};
999 1000 1001 1002 1003 1004 1005


int
virStorageBackendDiskRegister(void)
{
    return virStorageBackendRegister(&virStorageBackendDisk);
}