block.c 112.3 KB
Newer Older
B
bellard 已提交
1 2
/*
 * QEMU System Emulator block driver
3
 *
B
bellard 已提交
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
24
#include "config-host.h"
P
pbrook 已提交
25
#include "qemu-common.h"
26
#include "trace.h"
27 28
#include "block/block_int.h"
#include "block/blockjob.h"
29
#include "qemu/error-report.h"
30
#include "qemu/module.h"
31
#include "qapi/qmp/qerror.h"
32
#include "qapi/qmp/qjson.h"
33
#include "sysemu/block-backend.h"
34
#include "sysemu/sysemu.h"
35
#include "qemu/notify.h"
36
#include "qemu/coroutine.h"
37
#include "block/qapi.h"
L
Luiz Capitulino 已提交
38
#include "qmp-commands.h"
39
#include "qemu/timer.h"
40
#include "qapi-event.h"
41
#include "block/throttle-groups.h"
B
bellard 已提交
42

J
Juan Quintela 已提交
43
#ifdef CONFIG_BSD
B
bellard 已提交
44 45 46
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
B
Blue Swirl 已提交
47
#include <sys/queue.h>
48
#ifndef __DragonFly__
B
bellard 已提交
49 50
#include <sys/disk.h>
#endif
51
#endif
B
bellard 已提交
52

53 54 55 56
#ifdef _WIN32
#include <windows.h>
#endif

J
John Snow 已提交
57 58 59 60 61 62 63 64
/**
 * A BdrvDirtyBitmap can be in three possible states:
 * (1) successor is NULL and disabled is false: full r/w mode
 * (2) successor is NULL and disabled is true: read only mode ("disabled")
 * (3) successor is set: frozen mode.
 *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
 *     or enabled. A frozen bitmap can only abdicate() or reclaim().
 */
F
Fam Zheng 已提交
65
struct BdrvDirtyBitmap {
66 67 68 69 70
    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
    char *name;                 /* Optional non-empty unique ID */
    int64_t size;               /* Size of the bitmap (Number of sectors) */
    bool disabled;              /* Bitmap is read-only */
F
Fam Zheng 已提交
71 72 73
    QLIST_ENTRY(BdrvDirtyBitmap) list;
};

74 75
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

M
Max Reitz 已提交
76
struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
77

78 79 80
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
    QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);

81 82
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
    QLIST_HEAD_INITIALIZER(bdrv_drivers);
B
bellard 已提交
83

84 85 86
static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                             const char *reference, QDict *options, int flags,
                             BlockDriverState *parent,
87
                             const BdrvChildRole *child_role, Error **errp);
88

89
static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
90 91 92
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;

93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
            filename[1] == ':');
}

int is_windows_drive(const char *filename)
{
    if (is_windows_drive_prefix(filename) &&
        filename[2] == '\0')
        return 1;
    if (strstart(filename, "\\\\.\\", NULL) ||
        strstart(filename, "//./", NULL))
        return 1;
    return 0;
}
#endif

113 114 115
size_t bdrv_opt_mem_align(BlockDriverState *bs)
{
    if (!bs || !bs->drv) {
116 117
        /* page size or 4k (hdd sector size) should be on the safe side */
        return MAX(4096, getpagesize());
118 119 120 121 122
    }

    return bs->bl.opt_mem_alignment;
}

123 124 125
size_t bdrv_min_mem_align(BlockDriverState *bs)
{
    if (!bs || !bs->drv) {
126 127
        /* page size or 4k (hdd sector size) should be on the safe side */
        return MAX(4096, getpagesize());
128 129 130 131 132
    }

    return bs->bl.min_mem_alignment;
}

133
/* check if the path starts with "<protocol>:" */
134
int path_has_protocol(const char *path)
135
{
136 137
    const char *p;

138 139 140 141 142
#ifdef _WIN32
    if (is_windows_drive(path) ||
        is_windows_drive_prefix(path)) {
        return 0;
    }
143 144 145
    p = path + strcspn(path, ":/\\");
#else
    p = path + strcspn(path, ":/");
146 147
#endif

148
    return *p == ':';
149 150
}

B
bellard 已提交
151
int path_is_absolute(const char *path)
152
{
B
bellard 已提交
153 154
#ifdef _WIN32
    /* specific case for names like: "\\.\d:" */
P
Paolo Bonzini 已提交
155
    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
B
bellard 已提交
156
        return 1;
P
Paolo Bonzini 已提交
157 158
    }
    return (*path == '/' || *path == '\\');
159
#else
P
Paolo Bonzini 已提交
160
    return (*path == '/');
161
#endif
162 163
}

B
bellard 已提交
164 165 166 167 168 169
/* if filename is absolute, just copy it to dest. Otherwise, build a
   path to it by considering it is relative to base_path. URL are
   supported. */
void path_combine(char *dest, int dest_size,
                  const char *base_path,
                  const char *filename)
170
{
B
bellard 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183
    const char *p, *p1;
    int len;

    if (dest_size <= 0)
        return;
    if (path_is_absolute(filename)) {
        pstrcpy(dest, dest_size, filename);
    } else {
        p = strchr(base_path, ':');
        if (p)
            p++;
        else
            p = base_path;
184 185 186 187 188 189 190 191 192
        p1 = strrchr(base_path, '/');
#ifdef _WIN32
        {
            const char *p2;
            p2 = strrchr(base_path, '\\');
            if (!p1 || p2 > p1)
                p1 = p2;
        }
#endif
B
bellard 已提交
193 194 195 196 197 198 199 200 201 202 203 204
        if (p1)
            p1++;
        else
            p1 = base_path;
        if (p1 > p)
            p = p1;
        len = p - base_path;
        if (len > dest_size - 1)
            len = dest_size - 1;
        memcpy(dest, base_path, len);
        dest[len] = '\0';
        pstrcat(dest, dest_size, filename);
205 206 207
    }
}

208 209
void bdrv_get_full_backing_filename_from_filename(const char *backed,
                                                  const char *backing,
210 211
                                                  char *dest, size_t sz,
                                                  Error **errp)
212
{
213 214 215
    if (backing[0] == '\0' || path_has_protocol(backing) ||
        path_is_absolute(backing))
    {
216
        pstrcpy(dest, sz, backing);
217 218 219
    } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
        error_setg(errp, "Cannot use relative backing file names for '%s'",
                   backed);
220
    } else {
221
        path_combine(dest, sz, backed, backing);
222 223 224
    }
}

225 226
void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
                                    Error **errp)
227
{
228 229 230 231
    char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;

    bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
                                                 dest, sz, errp);
232 233
}

234 235 236
void bdrv_register(BlockDriver *bdrv)
{
    bdrv_setup_io_funcs(bdrv);
237

238
    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
B
bellard 已提交
239
}
B
bellard 已提交
240

241
BlockDriverState *bdrv_new_root(void)
B
bellard 已提交
242
{
243
    BlockDriverState *bs = bdrv_new();
244 245 246 247 248 249 250 251 252 253

    QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
    return bs;
}

BlockDriverState *bdrv_new(void)
{
    BlockDriverState *bs;
    int i;

254
    bs = g_new0(BlockDriverState, 1);
F
Fam Zheng 已提交
255
    QLIST_INIT(&bs->dirty_bitmaps);
256 257 258
    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
        QLIST_INIT(&bs->op_blockers[i]);
    }
P
Paolo Bonzini 已提交
259
    notifier_list_init(&bs->close_notifiers);
260
    notifier_with_return_list_init(&bs->before_write_notifiers);
261 262
    qemu_co_queue_init(&bs->throttled_reqs[0]);
    qemu_co_queue_init(&bs->throttled_reqs[1]);
263
    bs->refcnt = 1;
264
    bs->aio_context = qemu_get_aio_context();
P
Paolo Bonzini 已提交
265

B
bellard 已提交
266 267 268
    return bs;
}

P
Paolo Bonzini 已提交
269 270 271 272 273
void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
{
    notifier_list_add(&bs->close_notifiers, notify);
}

B
bellard 已提交
274 275 276
BlockDriver *bdrv_find_format(const char *format_name)
{
    BlockDriver *drv1;
277 278
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
        if (!strcmp(drv1->format_name, format_name)) {
B
bellard 已提交
279
            return drv1;
280
        }
B
bellard 已提交
281 282 283 284
    }
    return NULL;
}

285
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
286
{
287 288 289 290 291
    static const char *whitelist_rw[] = {
        CONFIG_BDRV_RW_WHITELIST
    };
    static const char *whitelist_ro[] = {
        CONFIG_BDRV_RO_WHITELIST
292 293 294
    };
    const char **p;

295
    if (!whitelist_rw[0] && !whitelist_ro[0]) {
296
        return 1;               /* no whitelist, anything goes */
297
    }
298

299
    for (p = whitelist_rw; *p; p++) {
300 301 302 303
        if (!strcmp(drv->format_name, *p)) {
            return 1;
        }
    }
304 305 306 307 308 309 310
    if (read_only) {
        for (p = whitelist_ro; *p; p++) {
            if (!strcmp(drv->format_name, *p)) {
                return 1;
            }
        }
    }
311 312 313
    return 0;
}

314 315 316
typedef struct CreateCo {
    BlockDriver *drv;
    char *filename;
317
    QemuOpts *opts;
318
    int ret;
319
    Error *err;
320 321 322 323
} CreateCo;

static void coroutine_fn bdrv_create_co_entry(void *opaque)
{
324 325 326
    Error *local_err = NULL;
    int ret;

327 328 329
    CreateCo *cco = opaque;
    assert(cco->drv);

C
Chunyan Liu 已提交
330
    ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
331
    if (local_err) {
332 333 334
        error_propagate(&cco->err, local_err);
    }
    cco->ret = ret;
335 336
}

337
int bdrv_create(BlockDriver *drv, const char* filename,
338
                QemuOpts *opts, Error **errp)
B
bellard 已提交
339
{
340 341 342 343 344 345
    int ret;

    Coroutine *co;
    CreateCo cco = {
        .drv = drv,
        .filename = g_strdup(filename),
346
        .opts = opts,
347
        .ret = NOT_DONE,
348
        .err = NULL,
349 350
    };

C
Chunyan Liu 已提交
351
    if (!drv->bdrv_create) {
352
        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
353 354
        ret = -ENOTSUP;
        goto out;
355 356 357 358 359 360 361 362 363
    }

    if (qemu_in_coroutine()) {
        /* Fast-path if already in coroutine context */
        bdrv_create_co_entry(&cco);
    } else {
        co = qemu_coroutine_create(bdrv_create_co_entry);
        qemu_coroutine_enter(co, &cco);
        while (cco.ret == NOT_DONE) {
364
            aio_poll(qemu_get_aio_context(), true);
365 366 367 368
        }
    }

    ret = cco.ret;
369
    if (ret < 0) {
370
        if (cco.err) {
371 372 373 374 375
            error_propagate(errp, cco.err);
        } else {
            error_setg_errno(errp, -ret, "Could not create image");
        }
    }
376

377 378
out:
    g_free(cco.filename);
379
    return ret;
B
bellard 已提交
380 381
}

C
Chunyan Liu 已提交
382
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
383 384
{
    BlockDriver *drv;
385 386
    Error *local_err = NULL;
    int ret;
387

388
    drv = bdrv_find_protocol(filename, true, errp);
389
    if (drv == NULL) {
390
        return -ENOENT;
391 392
    }

C
Chunyan Liu 已提交
393
    ret = bdrv_create(drv, filename, opts, &local_err);
394
    if (local_err) {
395 396 397
        error_propagate(errp, local_err);
    }
    return ret;
398 399
}

400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
/**
 * Try to get @bs's logical and physical block size.
 * On success, store them in @bsz struct and return 0.
 * On failure return -errno.
 * @bs must not be empty.
 */
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
    BlockDriver *drv = bs->drv;

    if (drv && drv->bdrv_probe_blocksizes) {
        return drv->bdrv_probe_blocksizes(bs, bsz);
    }

    return -ENOTSUP;
}

/**
 * Try to get @bs's geometry (cyls, heads, sectors).
 * On success, store them in @geo struct and return 0.
 * On failure return -errno.
 * @bs must not be empty.
 */
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
    BlockDriver *drv = bs->drv;

    if (drv && drv->bdrv_probe_geometry) {
        return drv->bdrv_probe_geometry(bs, geo);
    }

    return -ENOTSUP;
}

434 435 436 437 438
/*
 * Create a uniquely-named empty temporary file.
 * Return 0 upon success, otherwise a negative errno value.
 */
int get_tmp_filename(char *filename, int size)
B
bellard 已提交
439
{
440
#ifdef _WIN32
441
    char temp_dir[MAX_PATH];
442 443 444 445 446 447
    /* GetTempFileName requires that its output buffer (4th param)
       have length MAX_PATH or greater.  */
    assert(size >= MAX_PATH);
    return (GetTempPath(MAX_PATH, temp_dir)
            && GetTempFileName(temp_dir, "qem", 0, filename)
            ? 0 : -GetLastError());
B
bellard 已提交
448
#else
B
bellard 已提交
449
    int fd;
450
    const char *tmpdir;
A
aurel32 已提交
451
    tmpdir = getenv("TMPDIR");
452 453 454
    if (!tmpdir) {
        tmpdir = "/var/tmp";
    }
455 456 457
    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
        return -EOVERFLOW;
    }
B
bellard 已提交
458
    fd = mkstemp(filename);
459 460 461 462 463
    if (fd < 0) {
        return -errno;
    }
    if (close(fd) != 0) {
        unlink(filename);
464 465 466
        return -errno;
    }
    return 0;
B
bellard 已提交
467
#endif
468
}
B
bellard 已提交
469

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
/*
 * Detect host devices. By convention, /dev/cdrom[N] is always
 * recognized as a host CDROM.
 */
static BlockDriver *find_hdev_driver(const char *filename)
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe_device) {
            score = d->bdrv_probe_device(filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

492
BlockDriver *bdrv_find_protocol(const char *filename,
493 494
                                bool allow_protocol_prefix,
                                Error **errp)
B
bellard 已提交
495 496 497
{
    BlockDriver *drv1;
    char protocol[128];
498
    int len;
B
bellard 已提交
499
    const char *p;
B
bellard 已提交
500

501 502
    /* TODO Drivers without bdrv_file_open must be specified explicitly */

503 504 505 506 507 508 509 510 511 512 513 514
    /*
     * XXX(hch): we really should not let host device detection
     * override an explicit protocol specification, but moving this
     * later breaks access to device names with colons in them.
     * Thanks to the brain-dead persistent naming schemes on udev-
     * based Linux systems those actually are quite common.
     */
    drv1 = find_hdev_driver(filename);
    if (drv1) {
        return drv1;
    }

515
    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
516
        return &bdrv_file;
517
    }
518

519 520
    p = strchr(filename, ':');
    assert(p != NULL);
521 522 523 524 525
    len = p - filename;
    if (len > sizeof(protocol) - 1)
        len = sizeof(protocol) - 1;
    memcpy(protocol, filename, len);
    protocol[len] = '\0';
526
    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
527
        if (drv1->protocol_name &&
528
            !strcmp(drv1->protocol_name, protocol)) {
B
bellard 已提交
529
            return drv1;
530
        }
B
bellard 已提交
531
    }
532 533

    error_setg(errp, "Unknown protocol '%s'", protocol);
B
bellard 已提交
534 535 536
    return NULL;
}

537 538 539 540 541 542
/*
 * Guess image format by probing its contents.
 * This is not a good idea when your image is raw (CVE-2008-2004), but
 * we do it anyway for backward compatibility.
 *
 * @buf         contains the image's first @buf_size bytes.
543 544
 * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
 *              but can be smaller if the image file is smaller)
545 546 547 548 549 550
 * @filename    is its filename.
 *
 * For all block drivers, call the bdrv_probe() method to get its
 * probing score.
 * Return the first block driver with the highest probing score.
 */
551 552
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
                            const char *filename)
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
{
    int score_max = 0, score;
    BlockDriver *drv = NULL, *d;

    QLIST_FOREACH(d, &bdrv_drivers, list) {
        if (d->bdrv_probe) {
            score = d->bdrv_probe(buf, buf_size, filename);
            if (score > score_max) {
                score_max = score;
                drv = d;
            }
        }
    }

    return drv;
}

570
static int find_image_format(BlockDriverState *bs, const char *filename,
571
                             BlockDriver **pdrv, Error **errp)
572
{
573
    BlockDriver *drv;
574
    uint8_t buf[BLOCK_PROBE_BUF_SIZE];
575
    int ret = 0;
576

577
    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
578
    if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
579
        *pdrv = &bdrv_raw;
580
        return ret;
581
    }
582

B
bellard 已提交
583 584
    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
    if (ret < 0) {
585 586
        error_setg_errno(errp, -ret, "Could not read image for determining its "
                         "format");
587 588
        *pdrv = NULL;
        return ret;
B
bellard 已提交
589 590
    }

591
    drv = bdrv_probe_all(buf, ret, filename);
592
    if (!drv) {
593 594
        error_setg(errp, "Could not determine image format: No compatible "
                   "driver found");
595 596 597 598
        ret = -ENOENT;
    }
    *pdrv = drv;
    return ret;
B
bellard 已提交
599 600
}

601 602
/**
 * Set the current 'total_sectors' value
M
Markus Armbruster 已提交
603
 * Return 0 on success, -errno on error.
604 605 606 607 608
 */
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
    BlockDriver *drv = bs->drv;

609
    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
610
    if (bdrv_is_sg(bs))
611 612
        return 0;

613 614 615 616 617 618
    /* query actual device if possible, otherwise just trust the hint */
    if (drv->bdrv_getlength) {
        int64_t length = drv->bdrv_getlength(bs);
        if (length < 0) {
            return length;
        }
F
Fam Zheng 已提交
619
        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
620 621 622 623 624 625
    }

    bs->total_sectors = hint;
    return 0;
}

P
Paolo Bonzini 已提交
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
/**
 * Set open flags for a given discard mode
 *
 * Return 0 on success, -1 if the discard mode was invalid.
 */
int bdrv_parse_discard_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_UNMAP;

    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
        /* do nothing */
    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
        *flags |= BDRV_O_UNMAP;
    } else {
        return -1;
    }

    return 0;
}

646 647 648 649 650 651 652 653 654 655 656
/**
 * Set open flags for a given cache mode
 *
 * Return 0 on success, -1 if the cache mode was invalid.
 */
int bdrv_parse_cache_flags(const char *mode, int *flags)
{
    *flags &= ~BDRV_O_CACHE_MASK;

    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
657 658
    } else if (!strcmp(mode, "directsync")) {
        *flags |= BDRV_O_NOCACHE;
659 660 661 662 663 664 665 666 667 668 669 670 671 672
    } else if (!strcmp(mode, "writeback")) {
        *flags |= BDRV_O_CACHE_WB;
    } else if (!strcmp(mode, "unsafe")) {
        *flags |= BDRV_O_CACHE_WB;
        *flags |= BDRV_O_NO_FLUSH;
    } else if (!strcmp(mode, "writethrough")) {
        /* this is the default */
    } else {
        return -1;
    }

    return 0;
}

673 674 675 676 677 678 679 680 681 682
/*
 * Returns the flags that a temporary snapshot should get, based on the
 * originally requested flags (the originally requested image will have flags
 * like a backing file)
 */
static int bdrv_temp_snapshot_flags(int flags)
{
    return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
}

683
/*
684 685
 * Returns the flags that bs->file should get if a protocol driver is expected,
 * based on the given flags for the parent BDS
686 687 688 689 690 691 692 693 694 695 696
 */
static int bdrv_inherited_flags(int flags)
{
    /* Enable protocol handling, disable format probing for bs->file */
    flags |= BDRV_O_PROTOCOL;

    /* Our block drivers take care to send flushes and respect unmap policy,
     * so we can enable both unconditionally on lower layers. */
    flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;

    /* Clear flags that only apply to the top layer */
697
    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
698 699 700 701

    return flags;
}

702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
const BdrvChildRole child_file = {
    .inherit_flags = bdrv_inherited_flags,
};

/*
 * Returns the flags that bs->file should get if the use of formats (and not
 * only protocols) is permitted for it, based on the given flags for the parent
 * BDS
 */
static int bdrv_inherited_fmt_flags(int parent_flags)
{
    int flags = child_file.inherit_flags(parent_flags);
    return flags & ~BDRV_O_PROTOCOL;
}

const BdrvChildRole child_format = {
    .inherit_flags = bdrv_inherited_fmt_flags,
};

K
Kevin Wolf 已提交
721
/*
722
 * Returns the flags that bs->backing should get, based on the given flags
K
Kevin Wolf 已提交
723 724 725 726 727 728 729 730
 * for the parent BDS
 */
static int bdrv_backing_flags(int flags)
{
    /* backing files always opened read-only */
    flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);

    /* snapshot=on is handled on the top layer */
731
    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
K
Kevin Wolf 已提交
732 733 734 735

    return flags;
}

736 737 738 739
static const BdrvChildRole child_backing = {
    .inherit_flags = bdrv_backing_flags,
};

K
Kevin Wolf 已提交
740 741 742 743 744 745 746 747
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
    int open_flags = flags | BDRV_O_CACHE_WB;

    /*
     * Clear flags that are internal to the block layer before opening the
     * image.
     */
748
    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
K
Kevin Wolf 已提交
749 750 751 752

    /*
     * Snapshots should be writable.
     */
753
    if (flags & BDRV_O_TEMPORARY) {
K
Kevin Wolf 已提交
754 755 756 757 758 759
        open_flags |= BDRV_O_RDWR;
    }

    return open_flags;
}

760 761 762
static void bdrv_assign_node_name(BlockDriverState *bs,
                                  const char *node_name,
                                  Error **errp)
763
{
J
Jeff Cody 已提交
764
    char *gen_node_name = NULL;
765

J
Jeff Cody 已提交
766 767 768 769 770 771 772
    if (!node_name) {
        node_name = gen_node_name = id_generate(ID_BLOCK);
    } else if (!id_wellformed(node_name)) {
        /*
         * Check for empty string or invalid characters, but not if it is
         * generated (generated names use characters not available to the user)
         */
K
Kevin Wolf 已提交
773
        error_setg(errp, "Invalid node name");
774
        return;
775 776
    }

777
    /* takes care of avoiding namespaces collisions */
778
    if (blk_by_name(node_name)) {
779 780
        error_setg(errp, "node-name=%s is conflicting with a device id",
                   node_name);
J
Jeff Cody 已提交
781
        goto out;
782 783
    }

784 785 786
    /* takes care of avoiding duplicates node names */
    if (bdrv_find_node(node_name)) {
        error_setg(errp, "Duplicate node name");
J
Jeff Cody 已提交
787
        goto out;
788 789 790 791 792
    }

    /* copy node name into the bs and insert it into the graph list */
    pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
    QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
J
Jeff Cody 已提交
793 794
out:
    g_free(gen_node_name);
795 796
}

797 798 799 800 801 802 803 804 805 806 807 808 809
static QemuOptsList bdrv_runtime_opts = {
    .name = "bdrv_common",
    .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
    .desc = {
        {
            .name = "node-name",
            .type = QEMU_OPT_STRING,
            .help = "Node name of the block device node",
        },
        { /* end of list */ }
    },
};

810 811
/*
 * Common part for opening disk images and files
812 813
 *
 * Removes all processed options from *options.
814
 */
K
Kevin Wolf 已提交
815
static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
816
    QDict *options, int flags, BlockDriver *drv, Error **errp)
817 818
{
    int ret, open_flags;
K
Kevin Wolf 已提交
819
    const char *filename;
820
    const char *node_name = NULL;
821
    QemuOpts *opts;
822
    Error *local_err = NULL;
823 824

    assert(drv != NULL);
825
    assert(bs->file == NULL);
826
    assert(options != NULL && bs->options != options);
827

828
    if (file != NULL) {
K
Kevin Wolf 已提交
829
        filename = file->bs->filename;
830 831 832 833
    } else {
        filename = qdict_get_try_str(options, "filename");
    }

834 835 836 837 838 839
    if (drv->bdrv_needs_filename && !filename) {
        error_setg(errp, "The '%s' block driver requires a file name",
                   drv->format_name);
        return -EINVAL;
    }

840
    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
S
Stefan Hajnoczi 已提交
841

842 843
    opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
844
    if (local_err) {
845
        error_propagate(errp, local_err);
846 847
        ret = -EINVAL;
        goto fail_opts;
848 849
    }

850
    node_name = qemu_opt_get(opts, "node-name");
851
    bdrv_assign_node_name(bs, node_name, &local_err);
852
    if (local_err) {
853
        error_propagate(errp, local_err);
854 855
        ret = -EINVAL;
        goto fail_opts;
856 857
    }

858
    bs->request_alignment = 512;
A
Asias He 已提交
859
    bs->zero_beyond_eof = true;
860 861 862 863
    open_flags = bdrv_open_flags(bs, flags);
    bs->read_only = !(open_flags & BDRV_O_RDWR);

    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
864 865 866 867 868
        error_setg(errp,
                   !bs->read_only && bdrv_is_whitelisted(drv, true)
                        ? "Driver '%s' can only be used for read-only devices"
                        : "Driver '%s' is not whitelisted",
                   drv->format_name);
869 870
        ret = -ENOTSUP;
        goto fail_opts;
871
    }
872

873
    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
874 875 876 877 878
    if (flags & BDRV_O_COPY_ON_READ) {
        if (!bs->read_only) {
            bdrv_enable_copy_on_read(bs);
        } else {
            error_setg(errp, "Can't use copy-on-read on read-only device");
879 880
            ret = -EINVAL;
            goto fail_opts;
881
        }
882 883
    }

884 885 886 887 888
    if (filename != NULL) {
        pstrcpy(bs->filename, sizeof(bs->filename), filename);
    } else {
        bs->filename[0] = '\0';
    }
M
Max Reitz 已提交
889
    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
890 891

    bs->drv = drv;
892
    bs->opaque = g_malloc0(drv->instance_size);
893

894
    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
895

896 897
    /* Open the image, either directly or using a protocol */
    if (drv->bdrv_file_open) {
898
        assert(file == NULL);
899
        assert(!drv->bdrv_needs_filename || filename != NULL);
900
        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
901
    } else {
902
        if (file == NULL) {
903 904
            error_setg(errp, "Can't use '%s' as a block driver for the "
                       "protocol level", drv->format_name);
905 906 907
            ret = -EINVAL;
            goto free_and_fail;
        }
908
        bs->file = file;
909
        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
910 911
    }

912
    if (ret < 0) {
913
        if (local_err) {
914
            error_propagate(errp, local_err);
915 916
        } else if (bs->filename[0]) {
            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
917 918 919
        } else {
            error_setg_errno(errp, -ret, "Could not open image");
        }
920 921 922
        goto free_and_fail;
    }

923 924 925 926 927 928 929
    if (bs->encrypted) {
        error_report("Encrypted images are deprecated");
        error_printf("Support for them will be removed in a future release.\n"
                     "You can use 'qemu-img convert' to convert your image"
                     " to an unencrypted one.\n");
    }

930 931
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
932
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
933
        goto free_and_fail;
934
    }
935

936 937 938 939 940 941 942
    bdrv_refresh_limits(bs, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
        goto free_and_fail;
    }

943
    assert(bdrv_opt_mem_align(bs) != 0);
944
    assert(bdrv_min_mem_align(bs) != 0);
945
    assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
946 947

    qemu_opts_del(opts);
948 949 950
    return 0;

free_and_fail:
951
    bs->file = NULL;
952
    g_free(bs->opaque);
953 954
    bs->opaque = NULL;
    bs->drv = NULL;
955 956
fail_opts:
    qemu_opts_del(opts);
957 958 959
    return ret;
}

960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
static QDict *parse_json_filename(const char *filename, Error **errp)
{
    QObject *options_obj;
    QDict *options;
    int ret;

    ret = strstart(filename, "json:", &filename);
    assert(ret);

    options_obj = qobject_from_json(filename);
    if (!options_obj) {
        error_setg(errp, "Could not parse the JSON options");
        return NULL;
    }

    if (qobject_type(options_obj) != QTYPE_QDICT) {
        qobject_decref(options_obj);
        error_setg(errp, "Invalid JSON object given");
        return NULL;
    }

    options = qobject_to_qdict(options_obj);
    qdict_flatten(options);

    return options;
}

K
Kevin Wolf 已提交
987
/*
K
Kevin Wolf 已提交
988 989
 * Fills in default options for opening images and converts the legacy
 * filename/flags pair to option QDict entries.
990 991
 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
 * block driver has been specified explicitly.
K
Kevin Wolf 已提交
992
 */
993
static int bdrv_fill_options(QDict **options, const char **pfilename,
994
                             int *flags, Error **errp)
B
bellard 已提交
995
{
996
    const char *filename = *pfilename;
997
    const char *drvname;
998
    bool protocol = *flags & BDRV_O_PROTOCOL;
999
    bool parse_filename = false;
1000
    BlockDriver *drv = NULL;
1001
    Error *local_err = NULL;
B
bellard 已提交
1002

1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
    /* Parse json: pseudo-protocol */
    if (filename && g_str_has_prefix(filename, "json:")) {
        QDict *json_options = parse_json_filename(filename, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return -EINVAL;
        }

        /* Options given in the filename have lower priority than options
         * specified directly */
        qdict_join(*options, json_options, false);
        QDECREF(json_options);
        *pfilename = filename = NULL;
    }

1018
    drvname = qdict_get_try_str(*options, "driver");
1019 1020 1021 1022 1023 1024 1025 1026 1027
    if (drvname) {
        drv = bdrv_find_format(drvname);
        if (!drv) {
            error_setg(errp, "Unknown driver '%s'", drvname);
            return -ENOENT;
        }
        /* If the user has explicitly specified the driver, this choice should
         * override the BDRV_O_PROTOCOL flag */
        protocol = drv->bdrv_file_open;
1028 1029 1030 1031 1032 1033 1034 1035
    }

    if (protocol) {
        *flags |= BDRV_O_PROTOCOL;
    } else {
        *flags &= ~BDRV_O_PROTOCOL;
    }

K
Kevin Wolf 已提交
1036
    /* Fetch the file name from the options QDict if necessary */
1037
    if (protocol && filename) {
K
Kevin Wolf 已提交
1038 1039 1040 1041 1042 1043 1044 1045
        if (!qdict_haskey(*options, "filename")) {
            qdict_put(*options, "filename", qstring_from_str(filename));
            parse_filename = true;
        } else {
            error_setg(errp, "Can't specify 'file' and 'filename' options at "
                             "the same time");
            return -EINVAL;
        }
K
Kevin Wolf 已提交
1046 1047
    }

1048
    /* Find the right block driver */
K
Kevin Wolf 已提交
1049 1050
    filename = qdict_get_try_str(*options, "filename");

1051 1052 1053
    if (!drvname && protocol) {
        if (filename) {
            drv = bdrv_find_protocol(filename, parse_filename, errp);
1054
            if (!drv) {
1055
                return -EINVAL;
1056
            }
1057 1058 1059 1060 1061 1062

            drvname = drv->format_name;
            qdict_put(*options, "driver", qstring_from_str(drvname));
        } else {
            error_setg(errp, "Must specify either driver or file");
            return -EINVAL;
1063
        }
1064 1065
    }

1066
    assert(drv || !protocol);
1067

K
Kevin Wolf 已提交
1068
    /* Driver-specific filename parsing */
1069
    if (drv && drv->bdrv_parse_filename && parse_filename) {
1070
        drv->bdrv_parse_filename(filename, *options, &local_err);
1071
        if (local_err) {
1072
            error_propagate(errp, local_err);
K
Kevin Wolf 已提交
1073
            return -EINVAL;
1074
        }
1075 1076 1077 1078

        if (!drv->bdrv_needs_filename) {
            qdict_del(*options, "filename");
        }
1079 1080
    }

1081 1082 1083 1084
    if (runstate_check(RUN_STATE_INMIGRATE)) {
        *flags |= BDRV_O_INCOMING;
    }

K
Kevin Wolf 已提交
1085 1086 1087
    return 0;
}

K
Kevin Wolf 已提交
1088 1089 1090
static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                                    BlockDriverState *child_bs,
                                    const BdrvChildRole *child_role)
1091 1092 1093 1094 1095 1096 1097 1098
{
    BdrvChild *child = g_new(BdrvChild, 1);
    *child = (BdrvChild) {
        .bs     = child_bs,
        .role   = child_role,
    };

    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
K
Kevin Wolf 已提交
1099
    QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
K
Kevin Wolf 已提交
1100 1101

    return child;
1102 1103
}

1104
static void bdrv_detach_child(BdrvChild *child)
K
Kevin Wolf 已提交
1105 1106
{
    QLIST_REMOVE(child, next);
K
Kevin Wolf 已提交
1107
    QLIST_REMOVE(child, next_parent);
K
Kevin Wolf 已提交
1108 1109 1110 1111 1112
    g_free(child);
}

void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
{
1113 1114 1115 1116 1117
    BlockDriverState *child_bs;

    if (child == NULL) {
        return;
    }
K
Kevin Wolf 已提交
1118 1119 1120 1121 1122

    if (child->bs->inherits_from == parent) {
        child->bs->inherits_from = NULL;
    }

1123
    child_bs = child->bs;
K
Kevin Wolf 已提交
1124 1125 1126 1127
    bdrv_detach_child(child);
    bdrv_unref(child_bs);
}

1128 1129 1130 1131
/*
 * Sets the backing file link of a BDS. A new reference is created; callers
 * which don't need their own reference any more must call bdrv_unref().
 */
F
Fam Zheng 已提交
1132 1133
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
{
1134 1135 1136
    if (backing_hd) {
        bdrv_ref(backing_hd);
    }
F
Fam Zheng 已提交
1137

1138
    if (bs->backing) {
1139
        assert(bs->backing_blocker);
1140
        bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1141
        bdrv_unref_child(bs, bs->backing);
1142 1143
    } else if (backing_hd) {
        error_setg(&bs->backing_blocker,
1144 1145
                   "node is used as backing hd of '%s'",
                   bdrv_get_device_or_node_name(bs));
1146 1147
    }

F
Fam Zheng 已提交
1148
    if (!backing_hd) {
1149 1150
        error_free(bs->backing_blocker);
        bs->backing_blocker = NULL;
1151
        bs->backing = NULL;
F
Fam Zheng 已提交
1152 1153
        goto out;
    }
1154
    bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
F
Fam Zheng 已提交
1155 1156 1157 1158
    bs->open_flags &= ~BDRV_O_NO_BACKING;
    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
    pstrcpy(bs->backing_format, sizeof(bs->backing_format),
            backing_hd->drv ? backing_hd->drv->format_name : "");
1159

1160
    bdrv_op_block_all(backing_hd, bs->backing_blocker);
1161
    /* Otherwise we won't be able to commit due to check in bdrv_commit */
1162
    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1163
                    bs->backing_blocker);
F
Fam Zheng 已提交
1164
out:
1165
    bdrv_refresh_limits(bs, NULL);
F
Fam Zheng 已提交
1166 1167
}

1168 1169 1170 1171 1172 1173 1174 1175
/*
 * Opens the backing file for a BlockDriverState if not yet open
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict is transferred to this
 * function (even on failure), so if the caller intends to reuse the dictionary,
 * it needs to use QINCREF() before calling bdrv_file_open.
 */
1176
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
P
Paolo Bonzini 已提交
1177
{
1178
    char *backing_filename = g_malloc0(PATH_MAX);
K
Kevin Wolf 已提交
1179
    int ret = 0;
F
Fam Zheng 已提交
1180
    BlockDriverState *backing_hd;
1181
    Error *local_err = NULL;
P
Paolo Bonzini 已提交
1182

1183
    if (bs->backing != NULL) {
1184
        QDECREF(options);
1185
        goto free_exit;
P
Paolo Bonzini 已提交
1186 1187
    }

1188 1189 1190 1191 1192
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

P
Paolo Bonzini 已提交
1193
    bs->open_flags &= ~BDRV_O_NO_BACKING;
1194 1195 1196
    if (qdict_haskey(options, "file.filename")) {
        backing_filename[0] = '\0';
    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1197
        QDECREF(options);
1198
        goto free_exit;
F
Fam Zheng 已提交
1199
    } else {
1200 1201 1202 1203 1204 1205 1206 1207
        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
                                       &local_err);
        if (local_err) {
            ret = -EINVAL;
            error_propagate(errp, local_err);
            QDECREF(options);
            goto free_exit;
        }
P
Paolo Bonzini 已提交
1208 1209
    }

1210 1211 1212 1213 1214 1215 1216
    if (!bs->drv || !bs->drv->supports_backing) {
        ret = -EINVAL;
        error_setg(errp, "Driver doesn't support backing files");
        QDECREF(options);
        goto free_exit;
    }

1217
    backing_hd = bdrv_new();
F
Fam Zheng 已提交
1218

1219 1220
    if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
        qdict_put(options, "driver", qstring_from_str(bs->backing_format));
P
Paolo Bonzini 已提交
1221 1222
    }

1223
    assert(bs->backing == NULL);
1224 1225
    ret = bdrv_open_inherit(&backing_hd,
                            *backing_filename ? backing_filename : NULL,
1226
                            NULL, options, 0, bs, &child_backing, &local_err);
P
Paolo Bonzini 已提交
1227
    if (ret < 0) {
F
Fam Zheng 已提交
1228 1229
        bdrv_unref(backing_hd);
        backing_hd = NULL;
P
Paolo Bonzini 已提交
1230
        bs->open_flags |= BDRV_O_NO_BACKING;
1231 1232 1233
        error_setg(errp, "Could not open backing file: %s",
                   error_get_pretty(local_err));
        error_free(local_err);
1234
        goto free_exit;
P
Paolo Bonzini 已提交
1235
    }
1236

1237 1238
    /* Hook up the backing file link; drop our reference, bs owns the
     * backing_hd reference now */
F
Fam Zheng 已提交
1239
    bdrv_set_backing_hd(bs, backing_hd);
1240
    bdrv_unref(backing_hd);
P
Peter Feiner 已提交
1241

1242 1243 1244
free_exit:
    g_free(backing_filename);
    return ret;
P
Paolo Bonzini 已提交
1245 1246
}

M
Max Reitz 已提交
1247 1248 1249 1250 1251
/*
 * Opens a disk image whose options are given as BlockdevRef in another block
 * device's options.
 *
 * If allow_none is true, no image will be opened if filename is false and no
K
Kevin Wolf 已提交
1252
 * BlockdevRef is given. NULL will be returned, but errp remains unset.
M
Max Reitz 已提交
1253 1254 1255 1256 1257 1258 1259 1260
 *
 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
 * itself, all options starting with "${bdref_key}." are considered part of the
 * BlockdevRef.
 *
 * The BlockdevRef will be removed from the options QDict.
 */
K
Kevin Wolf 已提交
1261 1262 1263 1264 1265
BdrvChild *bdrv_open_child(const char *filename,
                           QDict *options, const char *bdref_key,
                           BlockDriverState* parent,
                           const BdrvChildRole *child_role,
                           bool allow_none, Error **errp)
M
Max Reitz 已提交
1266
{
K
Kevin Wolf 已提交
1267 1268
    BdrvChild *c = NULL;
    BlockDriverState *bs;
M
Max Reitz 已提交
1269 1270 1271 1272 1273
    QDict *image_options;
    int ret;
    char *bdref_key_dot;
    const char *reference;

1274
    assert(child_role != NULL);
1275

M
Max Reitz 已提交
1276 1277 1278 1279 1280 1281
    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
    g_free(bdref_key_dot);

    reference = qdict_get_try_str(options, bdref_key);
    if (!filename && !reference && !qdict_size(image_options)) {
K
Kevin Wolf 已提交
1282
        if (!allow_none) {
M
Max Reitz 已提交
1283 1284 1285
            error_setg(errp, "A block device must be specified for \"%s\"",
                       bdref_key);
        }
1286
        QDECREF(image_options);
M
Max Reitz 已提交
1287 1288 1289
        goto done;
    }

K
Kevin Wolf 已提交
1290 1291
    bs = NULL;
    ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1292
                            parent, child_role, errp);
1293 1294 1295 1296
    if (ret < 0) {
        goto done;
    }

K
Kevin Wolf 已提交
1297
    c = bdrv_attach_child(parent, bs, child_role);
M
Max Reitz 已提交
1298 1299 1300

done:
    qdict_del(options, bdref_key);
K
Kevin Wolf 已提交
1301 1302 1303
    return c;
}

1304
int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1305 1306
{
    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1307
    char *tmp_filename = g_malloc0(PATH_MAX + 1);
1308
    int64_t total_size;
1309
    QemuOpts *opts = NULL;
1310 1311
    QDict *snapshot_options;
    BlockDriverState *bs_snapshot;
1312
    Error *local_err = NULL;
1313 1314 1315 1316 1317 1318
    int ret;

    /* if snapshot, we create a temporary backing file and open it
       instead of opening 'filename' directly */

    /* Get the required size from the image */
1319 1320
    total_size = bdrv_getlength(bs);
    if (total_size < 0) {
1321
        ret = total_size;
1322
        error_setg_errno(errp, -total_size, "Could not get image size");
1323
        goto out;
1324
    }
1325 1326

    /* Create the temporary image */
1327
    ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1328 1329
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not get temporary filename");
1330
        goto out;
1331 1332
    }

1333
    opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
C
Chunyan Liu 已提交
1334
                            &error_abort);
1335
    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1336
    ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1337
    qemu_opts_del(opts);
1338 1339 1340 1341 1342
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not create temporary overlay "
                         "'%s': %s", tmp_filename,
                         error_get_pretty(local_err));
        error_free(local_err);
1343
        goto out;
1344 1345 1346 1347 1348 1349 1350 1351
    }

    /* Prepare a new options QDict for the temporary file */
    snapshot_options = qdict_new();
    qdict_put(snapshot_options, "file.driver",
              qstring_from_str("file"));
    qdict_put(snapshot_options, "file.filename",
              qstring_from_str(tmp_filename));
1352 1353
    qdict_put(snapshot_options, "driver",
              qstring_from_str("qcow2"));
1354

1355
    bs_snapshot = bdrv_new();
1356 1357

    ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1358
                    flags, &local_err);
1359 1360
    if (ret < 0) {
        error_propagate(errp, local_err);
1361
        goto out;
1362 1363 1364
    }

    bdrv_append(bs_snapshot, bs);
1365 1366 1367

out:
    g_free(tmp_filename);
1368
    return ret;
1369 1370
}

K
Kevin Wolf 已提交
1371 1372
/*
 * Opens a disk image (raw, qcow2, vmdk, ...)
1373 1374 1375 1376 1377
 *
 * options is a QDict of options to pass to the block drivers, or NULL for an
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1378 1379 1380
 *
 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
 * If it is not NULL, the referenced BDS will be reused.
1381 1382 1383 1384
 *
 * The reference parameter may be used to specify an existing block device which
 * should be opened. If specified, neither options nor a filename may be given,
 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
K
Kevin Wolf 已提交
1385
 */
1386 1387 1388
static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                             const char *reference, QDict *options, int flags,
                             BlockDriverState *parent,
1389
                             const BdrvChildRole *child_role, Error **errp)
B
bellard 已提交
1390
{
K
Kevin Wolf 已提交
1391
    int ret;
K
Kevin Wolf 已提交
1392 1393
    BdrvChild *file = NULL;
    BlockDriverState *bs;
1394
    BlockDriver *drv = NULL;
1395
    const char *drvname;
1396
    const char *backing;
1397
    Error *local_err = NULL;
1398
    int snapshot_flags = 0;
B
bellard 已提交
1399

1400
    assert(pbs);
1401 1402
    assert(!child_role || !flags);
    assert(!child_role == !parent);
1403

1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
    if (reference) {
        bool options_non_empty = options ? qdict_size(options) : false;
        QDECREF(options);

        if (*pbs) {
            error_setg(errp, "Cannot reuse an existing BDS when referencing "
                       "another block device");
            return -EINVAL;
        }

        if (filename || options_non_empty) {
            error_setg(errp, "Cannot reference an existing block device with "
                       "additional options or a new filename");
            return -EINVAL;
        }

        bs = bdrv_lookup_bs(reference, reference, errp);
        if (!bs) {
            return -ENODEV;
        }
        bdrv_ref(bs);
        *pbs = bs;
        return 0;
    }

1429 1430 1431
    if (*pbs) {
        bs = *pbs;
    } else {
1432
        bs = bdrv_new();
1433 1434
    }

1435 1436 1437 1438 1439
    /* NULL means an empty set of options */
    if (options == NULL) {
        options = qdict_new();
    }

1440
    if (child_role) {
1441
        bs->inherits_from = parent;
1442 1443 1444
        flags = child_role->inherit_flags(parent->open_flags);
    }

1445
    ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1446 1447 1448 1449
    if (local_err) {
        goto fail;
    }

1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
    /* Find the right image format driver */
    drvname = qdict_get_try_str(options, "driver");
    if (drvname) {
        drv = bdrv_find_format(drvname);
        qdict_del(options, "driver");
        if (!drv) {
            error_setg(errp, "Unknown driver: '%s'", drvname);
            ret = -EINVAL;
            goto fail;
        }
    }

    assert(drvname || !(flags & BDRV_O_PROTOCOL));

1464 1465 1466 1467 1468 1469
    backing = qdict_get_try_str(options, "backing");
    if (backing && *backing == '\0') {
        flags |= BDRV_O_NO_BACKING;
        qdict_del(options, "backing");
    }

1470
    bs->open_flags = flags;
1471
    bs->options = options;
1472
    options = qdict_clone_shallow(options);
1473

1474
    /* Open image file without format layer */
1475 1476 1477 1478 1479 1480 1481 1482
    if ((flags & BDRV_O_PROTOCOL) == 0) {
        if (flags & BDRV_O_RDWR) {
            flags |= BDRV_O_ALLOW_RDWR;
        }
        if (flags & BDRV_O_SNAPSHOT) {
            snapshot_flags = bdrv_temp_snapshot_flags(flags);
            flags = bdrv_backing_flags(flags);
        }
1483

1484
        bs->open_flags = flags;
K
Kevin Wolf 已提交
1485

K
Kevin Wolf 已提交
1486 1487
        file = bdrv_open_child(filename, options, "file", bs,
                               &child_file, true, &local_err);
K
Kevin Wolf 已提交
1488 1489
        if (local_err) {
            ret = -EINVAL;
1490 1491
            goto fail;
        }
1492 1493
    }

1494
    /* Image format probing */
1495
    bs->probed = !drv;
1496
    if (!drv && file) {
K
Kevin Wolf 已提交
1497
        ret = find_image_format(file->bs, filename, &drv, &local_err);
1498
        if (ret < 0) {
1499
            goto fail;
1500
        }
1501
    } else if (!drv) {
1502 1503
        error_setg(errp, "Must specify either driver or file");
        ret = -EINVAL;
1504
        goto fail;
B
bellard 已提交
1505
    }
K
Kevin Wolf 已提交
1506

1507 1508 1509 1510 1511 1512
    /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
    assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
    /* file must be NULL if a protocol BDS is about to be created
     * (the inverse results in an error message from bdrv_open_common()) */
    assert(!(flags & BDRV_O_PROTOCOL) || !file);

K
Kevin Wolf 已提交
1513
    /* Open the image */
1514
    ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
K
Kevin Wolf 已提交
1515
    if (ret < 0) {
1516
        goto fail;
1517 1518
    }

1519
    if (file && (bs->file != file)) {
K
Kevin Wolf 已提交
1520
        bdrv_unref_child(bs, file);
1521 1522 1523
        file = NULL;
    }

K
Kevin Wolf 已提交
1524
    /* If there is a backing file, use it */
P
Paolo Bonzini 已提交
1525
    if ((flags & BDRV_O_NO_BACKING) == 0) {
1526 1527
        QDict *backing_options;

1528
        qdict_extract_subqdict(options, &backing_options, "backing.");
1529
        ret = bdrv_open_backing_file(bs, backing_options, &local_err);
K
Kevin Wolf 已提交
1530
        if (ret < 0) {
1531
            goto close_and_fail;
K
Kevin Wolf 已提交
1532 1533 1534
        }
    }

M
Max Reitz 已提交
1535 1536
    bdrv_refresh_filename(bs);

1537
    /* Check if any unknown options were used */
1538
    if (options && (qdict_size(options) != 0)) {
1539
        const QDictEntry *entry = qdict_first(options);
1540 1541 1542 1543 1544 1545
        if (flags & BDRV_O_PROTOCOL) {
            error_setg(errp, "Block protocol '%s' doesn't support the option "
                       "'%s'", drv->format_name, entry->key);
        } else {
            error_setg(errp, "Block format '%s' used by device '%s' doesn't "
                       "support the option '%s'", drv->format_name,
1546
                       bdrv_get_device_name(bs), entry->key);
1547
        }
1548 1549 1550 1551 1552

        ret = -EINVAL;
        goto close_and_fail;
    }

K
Kevin Wolf 已提交
1553
    if (!bdrv_key_required(bs)) {
1554 1555 1556
        if (bs->blk) {
            blk_dev_change_media_cb(bs->blk, true);
        }
1557 1558 1559 1560 1561 1562 1563
    } else if (!runstate_check(RUN_STATE_PRELAUNCH)
               && !runstate_check(RUN_STATE_INMIGRATE)
               && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
        error_setg(errp,
                   "Guest must be stopped for opening of encrypted image");
        ret = -EBUSY;
        goto close_and_fail;
K
Kevin Wolf 已提交
1564 1565
    }

1566
    QDECREF(options);
1567
    *pbs = bs;
1568 1569 1570 1571 1572 1573 1574 1575 1576 1577

    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
     * temporary snapshot afterwards. */
    if (snapshot_flags) {
        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
        if (local_err) {
            goto close_and_fail;
        }
    }

K
Kevin Wolf 已提交
1578 1579
    return 0;

1580
fail:
1581
    if (file != NULL) {
K
Kevin Wolf 已提交
1582
        bdrv_unref_child(bs, file);
1583
    }
1584
    QDECREF(bs->options);
1585
    QDECREF(options);
1586
    bs->options = NULL;
1587 1588 1589 1590 1591 1592
    if (!*pbs) {
        /* If *pbs is NULL, a new BDS has been created in this function and
           needs to be freed now. Otherwise, it does not need to be closed,
           since it has not really been opened yet. */
        bdrv_unref(bs);
    }
1593
    if (local_err) {
1594 1595
        error_propagate(errp, local_err);
    }
1596
    return ret;
1597

1598
close_and_fail:
1599 1600 1601 1602 1603 1604
    /* See fail path, but now the BDS has to be always closed */
    if (*pbs) {
        bdrv_close(bs);
    } else {
        bdrv_unref(bs);
    }
1605
    QDECREF(options);
1606
    if (local_err) {
1607 1608
        error_propagate(errp, local_err);
    }
K
Kevin Wolf 已提交
1609 1610 1611
    return ret;
}

1612
int bdrv_open(BlockDriverState **pbs, const char *filename,
1613
              const char *reference, QDict *options, int flags, Error **errp)
1614 1615
{
    return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1616
                             NULL, errp);
1617 1618
}

1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636
typedef struct BlockReopenQueueEntry {
     bool prepared;
     BDRVReopenState state;
     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
} BlockReopenQueueEntry;

/*
 * Adds a BlockDriverState to a simple queue for an atomic, transactional
 * reopen of multiple devices.
 *
 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
 * already performed, or alternatively may be NULL a new BlockReopenQueue will
 * be created and initialized. This newly created BlockReopenQueue should be
 * passed back in for subsequent calls that are intended to be of the same
 * atomic 'set'.
 *
 * bs is the BlockDriverState to add to the reopen queue.
 *
1637 1638 1639
 * options contains the changed options for the associated bs
 * (the BlockReopenQueue takes ownership)
 *
1640 1641 1642 1643 1644 1645 1646
 * flags contains the open flags for the associated bs
 *
 * returns a pointer to bs_queue, which is either the newly allocated
 * bs_queue, or the existing bs_queue being used.
 *
 */
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1647 1648
                                    BlockDriverState *bs,
                                    QDict *options, int flags)
1649 1650 1651 1652
{
    assert(bs != NULL);

    BlockReopenQueueEntry *bs_entry;
K
Kevin Wolf 已提交
1653
    BdrvChild *child;
1654
    QDict *old_options;
K
Kevin Wolf 已提交
1655

1656 1657 1658 1659 1660
    if (bs_queue == NULL) {
        bs_queue = g_new0(BlockReopenQueue, 1);
        QSIMPLEQ_INIT(bs_queue);
    }

1661 1662 1663 1664 1665 1666 1667 1668
    if (!options) {
        options = qdict_new();
    }

    old_options = qdict_clone_shallow(bs->options);
    qdict_join(options, old_options, false);
    QDECREF(old_options);

1669 1670 1671
    /* bdrv_open() masks this flag out */
    flags &= ~BDRV_O_PROTOCOL;

K
Kevin Wolf 已提交
1672 1673 1674 1675 1676 1677 1678 1679
    QLIST_FOREACH(child, &bs->children, next) {
        int child_flags;

        if (child->bs->inherits_from != bs) {
            continue;
        }

        child_flags = child->role->inherit_flags(flags);
1680 1681
        /* TODO Pass down child flags (backing.*, extents.*, ...) */
        bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags);
1682 1683 1684 1685 1686 1687
    }

    bs_entry = g_new0(BlockReopenQueueEntry, 1);
    QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);

    bs_entry->state.bs = bs;
1688
    bs_entry->state.options = options;
1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740
    bs_entry->state.flags = flags;

    return bs_queue;
}

/*
 * Reopen multiple BlockDriverStates atomically & transactionally.
 *
 * The queue passed in (bs_queue) must have been built up previous
 * via bdrv_reopen_queue().
 *
 * Reopens all BDS specified in the queue, with the appropriate
 * flags.  All devices are prepared for reopen, and failure of any
 * device will cause all device changes to be abandonded, and intermediate
 * data cleaned up.
 *
 * If all devices prepare successfully, then the changes are committed
 * to all devices.
 *
 */
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
{
    int ret = -1;
    BlockReopenQueueEntry *bs_entry, *next;
    Error *local_err = NULL;

    assert(bs_queue != NULL);

    bdrv_drain_all();

    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
            error_propagate(errp, local_err);
            goto cleanup;
        }
        bs_entry->prepared = true;
    }

    /* If we reach this point, we have success and just need to apply the
     * changes
     */
    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
        bdrv_reopen_commit(&bs_entry->state);
    }

    ret = 0;

cleanup:
    QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
        if (ret && bs_entry->prepared) {
            bdrv_reopen_abort(&bs_entry->state);
        }
1741
        QDECREF(bs_entry->state.options);
1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753
        g_free(bs_entry);
    }
    g_free(bs_queue);
    return ret;
}


/* Reopen a single BlockDriverState with the specified flags. */
int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
{
    int ret = -1;
    Error *local_err = NULL;
1754
    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795

    ret = bdrv_reopen_multiple(queue, &local_err);
    if (local_err != NULL) {
        error_propagate(errp, local_err);
    }
    return ret;
}


/*
 * Prepares a BlockDriverState for reopen. All changes are staged in the
 * 'opaque' field of the BDRVReopenState, which is used and allocated by
 * the block driver layer .bdrv_reopen_prepare()
 *
 * bs is the BlockDriverState to reopen
 * flags are the new open flags
 * queue is the reopen queue
 *
 * Returns 0 on success, non-zero on error.  On error errp will be set
 * as well.
 *
 * On failure, bdrv_reopen_abort() will be called to clean up any data.
 * It is the responsibility of the caller to then call the abort() or
 * commit() for any other BDS that have been left in a prepare() state
 *
 */
int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
                        Error **errp)
{
    int ret = -1;
    Error *local_err = NULL;
    BlockDriver *drv;

    assert(reopen_state != NULL);
    assert(reopen_state->bs->drv != NULL);
    drv = reopen_state->bs->drv;

    /* if we are to stay read-only, do not allow permission change
     * to r/w */
    if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
        reopen_state->flags & BDRV_O_RDWR) {
1796 1797
        error_setg(errp, "Node '%s' is read only",
                   bdrv_get_device_or_node_name(reopen_state->bs));
1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814
        goto error;
    }


    ret = bdrv_flush(reopen_state->bs);
    if (ret) {
        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
                  strerror(-ret));
        goto error;
    }

    if (drv->bdrv_reopen_prepare) {
        ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
        if (ret) {
            if (local_err != NULL) {
                error_propagate(errp, local_err);
            } else {
1815 1816
                error_setg(errp, "failed while preparing to reopen image '%s'",
                           reopen_state->bs->filename);
1817 1818 1819 1820 1821 1822
            }
            goto error;
        }
    } else {
        /* It is currently mandatory to have a bdrv_reopen_prepare()
         * handler for each supported drv. */
1823 1824 1825
        error_setg(errp, "Block format '%s' used by node '%s' "
                   "does not support reopening files", drv->format_name,
                   bdrv_get_device_or_node_name(reopen_state->bs));
1826 1827 1828 1829
        ret = -1;
        goto error;
    }

1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849
    /* Options that are not handled are only okay if they are unchanged
     * compared to the old state. It is expected that some options are only
     * used for the initial open, but not reopen (e.g. filename) */
    if (qdict_size(reopen_state->options)) {
        const QDictEntry *entry = qdict_first(reopen_state->options);

        do {
            QString *new_obj = qobject_to_qstring(entry->value);
            const char *new = qstring_get_str(new_obj);
            const char *old = qdict_get_try_str(reopen_state->bs->options,
                                                entry->key);

            if (!old || strcmp(new, old)) {
                error_setg(errp, "Cannot change the option '%s'", entry->key);
                ret = -EINVAL;
                goto error;
            }
        } while ((entry = qdict_next(reopen_state->options, entry)));
    }

1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
    ret = 0;

error:
    return ret;
}

/*
 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
 * makes them final by swapping the staging BlockDriverState contents into
 * the active BlockDriverState contents.
 */
void bdrv_reopen_commit(BDRVReopenState *reopen_state)
{
    BlockDriver *drv;

    assert(reopen_state != NULL);
    drv = reopen_state->bs->drv;
    assert(drv != NULL);

    /* If there are any driver level actions to take */
    if (drv->bdrv_reopen_commit) {
        drv->bdrv_reopen_commit(reopen_state);
    }

    /* set BDS specific flags now */
    reopen_state->bs->open_flags         = reopen_state->flags;
    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
                                              BDRV_O_CACHE_WB);
    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1879

1880
    bdrv_refresh_limits(reopen_state->bs, NULL);
1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900
}

/*
 * Abort the reopen, and delete and free the staged changes in
 * reopen_state
 */
void bdrv_reopen_abort(BDRVReopenState *reopen_state)
{
    BlockDriver *drv;

    assert(reopen_state != NULL);
    drv = reopen_state->bs->drv;
    assert(drv != NULL);

    if (drv->bdrv_reopen_abort) {
        drv->bdrv_reopen_abort(reopen_state);
    }
}


B
bellard 已提交
1901 1902
void bdrv_close(BlockDriverState *bs)
{
M
Max Reitz 已提交
1903 1904
    BdrvAioNotifier *ban, *ban_next;

1905 1906 1907
    if (bs->job) {
        block_job_cancel_sync(bs->job);
    }
1908 1909

    /* Disable I/O limits and drain all pending throttled requests */
1910
    if (bs->throttle_state) {
1911 1912 1913
        bdrv_io_limits_disable(bs);
    }

1914
    bdrv_drain(bs); /* complete I/O */
1915
    bdrv_flush(bs);
1916
    bdrv_drain(bs); /* in case flush left pending I/O */
P
Paolo Bonzini 已提交
1917
    notifier_list_notify(&bs->close_notifiers, bs);
K
Kevin Wolf 已提交
1918

1919 1920 1921 1922
    if (bs->blk) {
        blk_dev_change_media_cb(bs->blk, false);
    }

1923
    if (bs->drv) {
1924 1925
        BdrvChild *child, *next;

1926
        bs->drv->bdrv_close(bs);
K
Kevin Wolf 已提交
1927
        bs->drv = NULL;
1928

1929
        bdrv_set_backing_hd(bs, NULL);
1930

K
Kevin Wolf 已提交
1931 1932 1933 1934 1935
        if (bs->file != NULL) {
            bdrv_unref_child(bs, bs->file);
            bs->file = NULL;
        }

1936
        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
K
Kevin Wolf 已提交
1937 1938
            /* TODO Remove bdrv_unref() from drivers' close function and use
             * bdrv_unref_child() here */
1939 1940 1941
            if (child->bs->inherits_from == bs) {
                child->bs->inherits_from = NULL;
            }
K
Kevin Wolf 已提交
1942
            bdrv_detach_child(child);
1943 1944
        }

1945
        g_free(bs->opaque);
B
bellard 已提交
1946
        bs->opaque = NULL;
1947
        bs->copy_on_read = 0;
1948 1949
        bs->backing_file[0] = '\0';
        bs->backing_format[0] = '\0';
1950 1951 1952 1953
        bs->total_sectors = 0;
        bs->encrypted = 0;
        bs->valid_key = 0;
        bs->sg = 0;
A
Asias He 已提交
1954
        bs->zero_beyond_eof = false;
1955 1956
        QDECREF(bs->options);
        bs->options = NULL;
M
Max Reitz 已提交
1957 1958
        QDECREF(bs->full_open_options);
        bs->full_open_options = NULL;
B
bellard 已提交
1959
    }
Z
Zhi Yong Wu 已提交
1960

M
Max Reitz 已提交
1961 1962 1963 1964
    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
        g_free(ban);
    }
    QLIST_INIT(&bs->aio_notifiers);
B
bellard 已提交
1965 1966
}

1967 1968 1969 1970
void bdrv_close_all(void)
{
    BlockDriverState *bs;

1971
    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1972 1973 1974
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
1975
        bdrv_close(bs);
1976
        aio_context_release(aio_context);
1977 1978 1979
    }
}

1980 1981
/* make a BlockDriverState anonymous by removing from bdrv_state and
 * graph_bdrv_state list.
1982 1983 1984
   Also, NULL terminate the device_name to prevent double remove */
void bdrv_make_anon(BlockDriverState *bs)
{
1985 1986 1987 1988 1989 1990 1991 1992
    /*
     * Take care to remove bs from bdrv_states only when it's actually
     * in it.  Note that bs->device_list.tqe_prev is initially null,
     * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
     * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
     * resetting it to null on remove.
     */
    if (bs->device_list.tqe_prev) {
1993
        QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1994
        bs->device_list.tqe_prev = NULL;
1995
    }
1996 1997 1998 1999
    if (bs->node_name[0] != '\0') {
        QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
    }
    bs->node_name[0] = '\0';
2000 2001
}

K
Kevin Wolf 已提交
2002
/* Fields that need to stay with the top-level BDS */
2003 2004
static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
                                     BlockDriverState *bs_src)
2005
{
2006
    /* move some fields that need to stay attached to the device */
2007 2008

    /* dev info */
2009
    bs_dest->copy_on_read       = bs_src->copy_on_read;
2010

2011
    bs_dest->enable_write_cache = bs_src->enable_write_cache;
2012

2013
    /* dirty bitmap */
F
Fam Zheng 已提交
2014
    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2015
}
2016

2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055
static void change_parent_backing_link(BlockDriverState *from,
                                       BlockDriverState *to)
{
    BdrvChild *c, *next;

    QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
        assert(c->role != &child_backing);
        c->bs = to;
        QLIST_REMOVE(c, next_parent);
        QLIST_INSERT_HEAD(&to->parents, c, next_parent);
        bdrv_ref(to);
        bdrv_unref(from);
    }
    if (from->blk) {
        blk_set_bs(from->blk, to);
        if (!to->device_list.tqe_prev) {
            QTAILQ_INSERT_BEFORE(from, to, device_list);
        }
        QTAILQ_REMOVE(&bdrv_states, from, device_list);
    }
}

static void swap_feature_fields(BlockDriverState *bs_top,
                                BlockDriverState *bs_new)
{
    BlockDriverState tmp;

    bdrv_move_feature_fields(&tmp, bs_top);
    bdrv_move_feature_fields(bs_top, bs_new);
    bdrv_move_feature_fields(bs_new, &tmp);

    assert(!bs_new->throttle_state);
    if (bs_top->throttle_state) {
        assert(bs_top->io_limits_enabled);
        bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
        bdrv_io_limits_disable(bs_top);
    }
}

2056 2057 2058 2059 2060 2061 2062
/*
 * Add new bs contents at the top of an image chain while the chain is
 * live, while keeping required fields on the top layer.
 *
 * This will modify the BlockDriverState fields, and swap contents
 * between bs_new and bs_top. Both bs_new and bs_top are modified.
 *
2063
 * bs_new must not be attached to a BlockBackend.
2064 2065
 *
 * This function does not create any image files.
2066 2067 2068 2069 2070
 *
 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
 * that's what the callers commonly need. bs_new will be referenced by the old
 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
 * reference of its own, it must call bdrv_ref().
2071 2072 2073
 */
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
{
2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084
    assert(!bdrv_requests_pending(bs_top));
    assert(!bdrv_requests_pending(bs_new));

    bdrv_ref(bs_top);
    change_parent_backing_link(bs_top, bs_new);

    /* Some fields always stay on top of the backing file chain */
    swap_feature_fields(bs_top, bs_new);

    bdrv_set_backing_hd(bs_new, bs_top);
    bdrv_unref(bs_top);
2085

2086 2087 2088
    /* bs_new is now referenced by its new parents, we don't need the
     * additional reference any more. */
    bdrv_unref(bs_new);
2089 2090
}

2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120
void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
{
    assert(!bdrv_requests_pending(old));
    assert(!bdrv_requests_pending(new));

    bdrv_ref(old);

    if (old->blk) {
        /* As long as these fields aren't in BlockBackend, but in the top-level
         * BlockDriverState, it's not possible for a BDS to have two BBs.
         *
         * We really want to copy the fields from old to new, but we go for a
         * swap instead so that pointers aren't duplicated and cause trouble.
         * (Also, bdrv_swap() used to do the same.) */
        assert(!new->blk);
        swap_feature_fields(old, new);
    }
    change_parent_backing_link(old, new);

    /* Change backing files if a previously independent node is added to the
     * chain. For active commit, we replace top by its own (indirect) backing
     * file and don't do anything here so we don't build a loop. */
    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
        bdrv_set_backing_hd(new, backing_bs(old));
        bdrv_set_backing_hd(old, NULL);
    }

    bdrv_unref(old);
}

F
Fam Zheng 已提交
2121
static void bdrv_delete(BlockDriverState *bs)
B
bellard 已提交
2122
{
2123
    assert(!bs->job);
2124
    assert(bdrv_op_blocker_is_empty(bs));
F
Fam Zheng 已提交
2125
    assert(!bs->refcnt);
F
Fam Zheng 已提交
2126
    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2127

2128 2129
    bdrv_close(bs);

2130
    /* remove from list, if necessary */
2131
    bdrv_make_anon(bs);
2132

2133
    g_free(bs);
B
bellard 已提交
2134 2135
}

A
aliguori 已提交
2136 2137 2138
/*
 * Run consistency checks on an image
 *
2139
 * Returns 0 if the check could be completed (it doesn't mean that the image is
2140
 * free of errors) or -errno when an internal error occurred. The results of the
2141
 * check are stored in res.
A
aliguori 已提交
2142
 */
2143
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
A
aliguori 已提交
2144
{
2145 2146 2147
    if (bs->drv == NULL) {
        return -ENOMEDIUM;
    }
A
aliguori 已提交
2148 2149 2150 2151
    if (bs->drv->bdrv_check == NULL) {
        return -ENOTSUP;
    }

2152
    memset(res, 0, sizeof(*res));
2153
    return bs->drv->bdrv_check(bs, res, fix);
A
aliguori 已提交
2154 2155
}

2156 2157
#define COMMIT_BUF_SECTORS 2048

2158 2159 2160
/* commit COW file into the raw image */
int bdrv_commit(BlockDriverState *bs)
{
B
bellard 已提交
2161
    BlockDriver *drv = bs->drv;
2162
    int64_t sector, total_sectors, length, backing_length;
2163
    int n, ro, open_flags;
2164
    int ret = 0;
2165
    uint8_t *buf = NULL;
2166

B
bellard 已提交
2167 2168
    if (!drv)
        return -ENOMEDIUM;
2169

2170
    if (!bs->backing) {
2171
        return -ENOTSUP;
2172 2173
    }

2174
    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2175
        bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2176 2177 2178
        return -EBUSY;
    }

2179 2180
    ro = bs->backing->bs->read_only;
    open_flags =  bs->backing->bs->open_flags;
2181 2182

    if (ro) {
2183
        if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2184
            return -EACCES;
2185
        }
B
bellard 已提交
2186
    }
2187

2188 2189 2190 2191 2192 2193
    length = bdrv_getlength(bs);
    if (length < 0) {
        ret = length;
        goto ro_cleanup;
    }

2194
    backing_length = bdrv_getlength(bs->backing->bs);
2195 2196 2197 2198 2199 2200 2201 2202 2203
    if (backing_length < 0) {
        ret = backing_length;
        goto ro_cleanup;
    }

    /* If our top snapshot is larger than the backing file image,
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
2204
        ret = bdrv_truncate(bs->backing->bs, length);
2205 2206 2207 2208 2209 2210
        if (ret < 0) {
            goto ro_cleanup;
        }
    }

    total_sectors = length >> BDRV_SECTOR_BITS;
2211 2212

    /* qemu_try_blockalign() for bs will choose an alignment that works for
2213
     * bs->backing->bs as well, so no need to compare the alignment manually. */
2214 2215 2216 2217 2218
    buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
    if (buf == NULL) {
        ret = -ENOMEM;
        goto ro_cleanup;
    }
2219 2220

    for (sector = 0; sector < total_sectors; sector += n) {
2221 2222 2223 2224 2225
        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
        if (ret < 0) {
            goto ro_cleanup;
        }
        if (ret) {
K
Kevin Wolf 已提交
2226 2227
            ret = bdrv_read(bs, sector, buf, n);
            if (ret < 0) {
2228 2229 2230
                goto ro_cleanup;
            }

2231
            ret = bdrv_write(bs->backing->bs, sector, buf, n);
K
Kevin Wolf 已提交
2232
            if (ret < 0) {
2233 2234
                goto ro_cleanup;
            }
B
bellard 已提交
2235
        }
2236
    }
2237

2238 2239
    if (drv->bdrv_make_empty) {
        ret = drv->bdrv_make_empty(bs);
K
Kevin Wolf 已提交
2240 2241 2242
        if (ret < 0) {
            goto ro_cleanup;
        }
2243 2244
        bdrv_flush(bs);
    }
2245

2246 2247 2248 2249
    /*
     * Make sure all data we wrote to the backing device is actually
     * stable on disk.
     */
2250 2251
    if (bs->backing) {
        bdrv_flush(bs->backing->bs);
K
Kevin Wolf 已提交
2252
    }
2253

K
Kevin Wolf 已提交
2254
    ret = 0;
2255
ro_cleanup:
2256
    qemu_vfree(buf);
2257 2258

    if (ro) {
2259
        /* ignoring error return here */
2260
        bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2261 2262
    }

2263
    return ret;
2264 2265
}

2266
int bdrv_commit_all(void)
2267 2268 2269
{
    BlockDriverState *bs;

2270
    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2271 2272 2273
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
2274
        if (bs->drv && bs->backing) {
2275 2276
            int ret = bdrv_commit(bs);
            if (ret < 0) {
2277
                aio_context_release(aio_context);
2278 2279
                return ret;
            }
2280
        }
2281
        aio_context_release(aio_context);
2282
    }
2283
    return 0;
2284 2285
}

K
Kevin Wolf 已提交
2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297
/*
 * Return values:
 * 0        - success
 * -EINVAL  - backing format specified, but no file
 * -ENOSPC  - can't update the backing file because no space is left in the
 *            image file header
 * -ENOTSUP - format driver doesn't support changing the backing file
 */
int bdrv_change_backing_file(BlockDriverState *bs,
    const char *backing_file, const char *backing_fmt)
{
    BlockDriver *drv = bs->drv;
2298
    int ret;
K
Kevin Wolf 已提交
2299

2300 2301 2302 2303 2304
    /* Backing file format doesn't make sense without a backing file */
    if (backing_fmt && !backing_file) {
        return -EINVAL;
    }

K
Kevin Wolf 已提交
2305
    if (drv->bdrv_change_backing_file != NULL) {
2306
        ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
K
Kevin Wolf 已提交
2307
    } else {
2308
        ret = -ENOTSUP;
K
Kevin Wolf 已提交
2309
    }
2310 2311 2312 2313 2314 2315

    if (ret == 0) {
        pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
        pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
    }
    return ret;
K
Kevin Wolf 已提交
2316 2317
}

2318 2319 2320 2321 2322 2323 2324
/*
 * Finds the image layer in the chain that has 'bs' as its backing file.
 *
 * active is the current topmost image.
 *
 * Returns NULL if bs is not found in active's image chain,
 * or if active == bs.
2325 2326
 *
 * Returns the bottommost base image if bs == NULL.
2327 2328 2329 2330
 */
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
                                    BlockDriverState *bs)
{
2331 2332
    while (active && bs != backing_bs(active)) {
        active = backing_bs(active);
2333 2334
    }

2335 2336
    return active;
}
2337

2338 2339 2340 2341
/* Given a BDS, searches for the base layer. */
BlockDriverState *bdrv_find_base(BlockDriverState *bs)
{
    return bdrv_find_overlay(bs, NULL);
2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365
}

/*
 * Drops images above 'base' up to and including 'top', and sets the image
 * above 'top' to have base as its backing file.
 *
 * Requires that the overlay to 'top' is opened r/w, so that the backing file
 * information in 'bs' can be properly updated.
 *
 * E.g., this will convert the following chain:
 * bottom <- base <- intermediate <- top <- active
 *
 * to
 *
 * bottom <- base <- active
 *
 * It is allowed for bottom==base, in which case it converts:
 *
 * base <- intermediate <- top <- active
 *
 * to
 *
 * base <- active
 *
2366 2367 2368
 * If backing_file_str is non-NULL, it will be used when modifying top's
 * overlay image metadata.
 *
2369 2370 2371 2372 2373
 * Error conditions:
 *  if active == top, that is considered an error
 *
 */
int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2374
                           BlockDriverState *base, const char *backing_file_str)
2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389
{
    BlockDriverState *new_top_bs = NULL;
    int ret = -EIO;

    if (!top->drv || !base->drv) {
        goto exit;
    }

    new_top_bs = bdrv_find_overlay(active, top);

    if (new_top_bs == NULL) {
        /* we could not find the image above 'top', this is an error */
        goto exit;
    }

2390
    /* special case of new_top_bs->backing->bs already pointing to base - nothing
2391
     * to do, no intermediate images */
2392
    if (backing_bs(new_top_bs) == base) {
2393 2394 2395 2396
        ret = 0;
        goto exit;
    }

2397 2398
    /* Make sure that base is in the backing chain of top */
    if (!bdrv_chain_contains(top, base)) {
2399 2400 2401 2402
        goto exit;
    }

    /* success - we can delete the intermediate states, and link top->base */
2403
    backing_file_str = backing_file_str ? backing_file_str : base->filename;
2404
    ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2405
                                   base->drv ? base->drv->format_name : "");
2406 2407 2408
    if (ret) {
        goto exit;
    }
2409
    bdrv_set_backing_hd(new_top_bs, base);
2410 2411 2412 2413 2414 2415

    ret = 0;
exit:
    return ret;
}

2416 2417 2418 2419
/**
 * Truncate file to 'offset' bytes (needed only for file protocols)
 */
int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2420
{
2421 2422 2423
    BlockDriver *drv = bs->drv;
    int ret;
    if (!drv)
2424
        return -ENOMEDIUM;
2425 2426 2427 2428
    if (!drv->bdrv_truncate)
        return -ENOTSUP;
    if (bs->read_only)
        return -EACCES;
2429

2430 2431 2432 2433 2434 2435 2436
    ret = drv->bdrv_truncate(bs, offset);
    if (ret == 0) {
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
        bdrv_dirty_bitmap_truncate(bs);
        if (bs->blk) {
            blk_dev_resize_cb(bs->blk);
        }
M
Max Reitz 已提交
2437
    }
2438
    return ret;
2439 2440
}

2441 2442 2443 2444 2445
/**
 * Length of a allocated file in bytes. Sparse files are counted by actual
 * allocated space. Return < 0 if error or unknown.
 */
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2446
{
2447 2448 2449
    BlockDriver *drv = bs->drv;
    if (!drv) {
        return -ENOMEDIUM;
2450
    }
2451 2452 2453 2454
    if (drv->bdrv_get_allocated_file_size) {
        return drv->bdrv_get_allocated_file_size(bs);
    }
    if (bs->file) {
K
Kevin Wolf 已提交
2455
        return bdrv_get_allocated_file_size(bs->file->bs);
2456
    }
2457
    return -ENOTSUP;
2458
}
2459

2460 2461
/**
 * Return number of sectors on success, -errno on error.
2462
 */
2463
int64_t bdrv_nb_sectors(BlockDriverState *bs)
2464
{
2465
    BlockDriver *drv = bs->drv;
2466

2467 2468
    if (!drv)
        return -ENOMEDIUM;
2469

2470 2471 2472 2473
    if (drv->has_variable_length) {
        int ret = refresh_total_sectors(bs, bs->total_sectors);
        if (ret < 0) {
            return ret;
2474 2475
        }
    }
2476
    return bs->total_sectors;
2477
}
B
bellard 已提交
2478

2479 2480 2481
/**
 * Return length in bytes on success, -errno on error.
 * The length is always a multiple of BDRV_SECTOR_SIZE.
2482
 */
2483
int64_t bdrv_getlength(BlockDriverState *bs)
2484
{
2485
    int64_t ret = bdrv_nb_sectors(bs);
2486

2487
    ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2488
    return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
B
bellard 已提交
2489 2490
}

2491 2492
/* return 0 as number of sectors if no device present or error */
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2493
{
2494
    int64_t nb_sectors = bdrv_nb_sectors(bs);
2495

2496
    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2497 2498
}

2499
int bdrv_is_read_only(BlockDriverState *bs)
2500
{
2501
    return bs->read_only;
B
bellard 已提交
2502 2503
}

2504
int bdrv_is_sg(BlockDriverState *bs)
K
Kevin Wolf 已提交
2505
{
2506
    return bs->sg;
K
Kevin Wolf 已提交
2507 2508
}

2509
int bdrv_enable_write_cache(BlockDriverState *bs)
S
Stefan Hajnoczi 已提交
2510
{
2511
    return bs->enable_write_cache;
S
Stefan Hajnoczi 已提交
2512 2513
}

2514
void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
K
Kevin Wolf 已提交
2515
{
2516
    bs->enable_write_cache = wce;
S
Stefan Hajnoczi 已提交
2517

2518 2519 2520
    /* so a reopen() will preserve wce */
    if (wce) {
        bs->open_flags |= BDRV_O_CACHE_WB;
2521
    } else {
2522
        bs->open_flags &= ~BDRV_O_CACHE_WB;
2523
    }
K
Kevin Wolf 已提交
2524 2525
}

2526
int bdrv_is_encrypted(BlockDriverState *bs)
F
Fam Zheng 已提交
2527
{
2528
    if (bs->backing && bs->backing->bs->encrypted) {
2529
        return 1;
2530
    }
2531
    return bs->encrypted;
F
Fam Zheng 已提交
2532 2533
}

2534
int bdrv_key_required(BlockDriverState *bs)
F
Fam Zheng 已提交
2535
{
2536
    BdrvChild *backing = bs->backing;
2537

2538
    if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2539
        return 1;
2540
    }
2541
    return (bs->encrypted && !bs->valid_key);
F
Fam Zheng 已提交
2542 2543
}

2544
int bdrv_set_key(BlockDriverState *bs, const char *key)
2545 2546
{
    int ret;
2547 2548
    if (bs->backing && bs->backing->bs->encrypted) {
        ret = bdrv_set_key(bs->backing->bs, key);
2549 2550 2551 2552 2553 2554 2555 2556
        if (ret < 0)
            return ret;
        if (!bs->encrypted)
            return 0;
    }
    if (!bs->encrypted) {
        return -EINVAL;
    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2557 2558
        return -ENOMEDIUM;
    }
2559
    ret = bs->drv->bdrv_set_key(bs, key);
2560
    if (ret < 0) {
2561 2562 2563 2564 2565 2566 2567
        bs->valid_key = 0;
    } else if (!bs->valid_key) {
        bs->valid_key = 1;
        if (bs->blk) {
            /* call the change callback now, we skipped it on open */
            blk_dev_change_media_cb(bs->blk, true);
        }
2568
    }
2569 2570
    return ret;
}
2571

2572
/*
2573 2574 2575 2576 2577 2578 2579 2580 2581
 * Provide an encryption key for @bs.
 * If @key is non-null:
 *     If @bs is not encrypted, fail.
 *     Else if the key is invalid, fail.
 *     Else set @bs's key to @key, replacing the existing key, if any.
 * If @key is null:
 *     If @bs is encrypted and still lacks a key, fail.
 *     Else do nothing.
 * On failure, store an error object through @errp if non-null.
2582
 */
2583
void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2584
{
2585 2586 2587 2588 2589
    if (key) {
        if (!bdrv_is_encrypted(bs)) {
            error_setg(errp, "Node '%s' is not encrypted",
                      bdrv_get_device_or_node_name(bs));
        } else if (bdrv_set_key(bs, key) < 0) {
2590
            error_setg(errp, QERR_INVALID_PASSWORD);
2591 2592 2593
        }
    } else {
        if (bdrv_key_required(bs)) {
2594 2595
            error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
                      "'%s' (%s) is encrypted",
2596
                      bdrv_get_device_or_node_name(bs),
2597 2598 2599 2600 2601
                      bdrv_get_encrypted_filename(bs));
        }
    }
}

2602
const char *bdrv_get_format_name(BlockDriverState *bs)
K
Kevin Wolf 已提交
2603
{
2604
    return bs->drv ? bs->drv->format_name : NULL;
K
Kevin Wolf 已提交
2605 2606
}

2607
static int qsort_strcmp(const void *a, const void *b)
K
Kevin Wolf 已提交
2608
{
2609
    return strcmp(a, b);
K
Kevin Wolf 已提交
2610 2611
}

2612 2613
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
                         void *opaque)
K
Kevin Wolf 已提交
2614
{
2615 2616 2617 2618
    BlockDriver *drv;
    int count = 0;
    int i;
    const char **formats = NULL;
K
Kevin Wolf 已提交
2619

2620 2621 2622 2623 2624 2625 2626
    QLIST_FOREACH(drv, &bdrv_drivers, list) {
        if (drv->format_name) {
            bool found = false;
            int i = count;
            while (formats && i && !found) {
                found = !strcmp(formats[--i], drv->format_name);
            }
2627

2628 2629 2630 2631
            if (!found) {
                formats = g_renew(const char *, formats, count + 1);
                formats[count++] = drv->format_name;
            }
2632
        }
2633
    }
2634

2635
    qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
K
Kevin Wolf 已提交
2636

2637 2638 2639
    for (i = 0; i < count; i++) {
        it(opaque, formats[i]);
    }
K
Kevin Wolf 已提交
2640

2641 2642
    g_free(formats);
}
K
Kevin Wolf 已提交
2643

2644 2645 2646 2647
/* This function is to find a node in the bs graph */
BlockDriverState *bdrv_find_node(const char *node_name)
{
    BlockDriverState *bs;
2648

2649
    assert(node_name);
K
Kevin Wolf 已提交
2650

2651 2652 2653
    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
        if (!strcmp(node_name, bs->node_name)) {
            return bs;
K
Kevin Wolf 已提交
2654 2655
        }
    }
2656
    return NULL;
K
Kevin Wolf 已提交
2657 2658
}

2659 2660
/* Put this QMP function here so it can access the static graph_bdrv_states. */
BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
K
Kevin Wolf 已提交
2661
{
2662 2663
    BlockDeviceInfoList *list, *entry;
    BlockDriverState *bs;
K
Kevin Wolf 已提交
2664

2665 2666 2667 2668 2669 2670
    list = NULL;
    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
        BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
        if (!info) {
            qapi_free_BlockDeviceInfoList(list);
            return NULL;
2671
        }
2672 2673 2674 2675
        entry = g_malloc0(sizeof(*entry));
        entry->value = info;
        entry->next = list;
        list = entry;
2676 2677
    }

2678 2679
    return list;
}
K
Kevin Wolf 已提交
2680

2681 2682 2683 2684 2685 2686
BlockDriverState *bdrv_lookup_bs(const char *device,
                                 const char *node_name,
                                 Error **errp)
{
    BlockBackend *blk;
    BlockDriverState *bs;
K
Kevin Wolf 已提交
2687

2688 2689
    if (device) {
        blk = blk_by_name(device);
K
Kevin Wolf 已提交
2690

2691
        if (blk) {
2692 2693
            bs = blk_bs(blk);
            if (!bs) {
M
Max Reitz 已提交
2694 2695 2696
                error_setg(errp, "Device '%s' has no medium", device);
            }

2697
            return bs;
2698 2699
        }
    }
K
Kevin Wolf 已提交
2700

2701 2702
    if (node_name) {
        bs = bdrv_find_node(node_name);
2703

2704 2705 2706
        if (bs) {
            return bs;
        }
K
Kevin Wolf 已提交
2707 2708
    }

2709 2710 2711 2712
    error_setg(errp, "Cannot find device=%s nor node_name=%s",
                     device ? device : "",
                     node_name ? node_name : "");
    return NULL;
K
Kevin Wolf 已提交
2713 2714
}

2715 2716 2717
/* If 'base' is in the same chain as 'top', return true. Otherwise,
 * return false.  If either argument is NULL, return false. */
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
B
bellard 已提交
2718
{
2719
    while (top && top != base) {
2720
        top = backing_bs(top);
F
Fam Zheng 已提交
2721
    }
2722 2723

    return top != NULL;
F
Fam Zheng 已提交
2724 2725
}

2726
BlockDriverState *bdrv_next_node(BlockDriverState *bs)
F
Fam Zheng 已提交
2727
{
2728 2729
    if (!bs) {
        return QTAILQ_FIRST(&graph_bdrv_states);
F
Fam Zheng 已提交
2730
    }
2731
    return QTAILQ_NEXT(bs, node_list);
B
bellard 已提交
2732 2733
}

2734
BlockDriverState *bdrv_next(BlockDriverState *bs)
B
bellard 已提交
2735
{
2736 2737
    if (!bs) {
        return QTAILQ_FIRST(&bdrv_states);
2738
    }
2739
    return QTAILQ_NEXT(bs, device_list);
B
bellard 已提交
2740
}
2741

2742
const char *bdrv_get_node_name(const BlockDriverState *bs)
B
bellard 已提交
2743
{
2744
    return bs->node_name;
2745 2746
}

2747 2748
/* TODO check what callers really want: bs->node_name or blk_name() */
const char *bdrv_get_device_name(const BlockDriverState *bs)
2749
{
2750
    return bs->blk ? blk_name(bs->blk) : "";
2751
}
B
bellard 已提交
2752

2753 2754 2755 2756 2757
/* This can be used to identify nodes that might not have a device
 * name associated. Since node and device names live in the same
 * namespace, the result is unambiguous. The exception is if both are
 * absent, then this returns an empty (non-null) string. */
const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2758
{
2759
    return bs->blk ? blk_name(bs->blk) : bs->node_name;
2760 2761
}

2762
int bdrv_get_flags(BlockDriverState *bs)
2763
{
2764
    return bs->open_flags;
2765 2766
}

2767
int bdrv_has_zero_init_1(BlockDriverState *bs)
2768
{
2769
    return 1;
2770 2771
}

2772
int bdrv_has_zero_init(BlockDriverState *bs)
2773
{
2774
    assert(bs->drv);
2775

2776 2777
    /* If BS is a copy on write image, it is initialized to
       the contents of the base image, which may not be zeroes.  */
2778
    if (bs->backing) {
2779 2780 2781 2782
        return 0;
    }
    if (bs->drv->bdrv_has_zero_init) {
        return bs->drv->bdrv_has_zero_init(bs);
2783
    }
2784 2785 2786

    /* safe default */
    return 0;
2787 2788
}

2789
bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2790
{
2791
    BlockDriverInfo bdi;
2792

2793
    if (bs->backing) {
2794 2795 2796 2797 2798
        return false;
    }

    if (bdrv_get_info(bs, &bdi) == 0) {
        return bdi.unallocated_blocks_are_zero;
2799 2800
    }

2801
    return false;
2802 2803
}

2804
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2805
{
2806
    BlockDriverInfo bdi;
2807

2808
    if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2809 2810
        return false;
    }
2811

2812 2813 2814
    if (bdrv_get_info(bs, &bdi) == 0) {
        return bdi.can_write_zeroes_with_unmap;
    }
2815

2816
    return false;
2817 2818
}

2819
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2820
{
2821
    if (bs->backing && bs->backing->bs->encrypted)
2822 2823 2824 2825 2826
        return bs->backing_file;
    else if (bs->encrypted)
        return bs->filename;
    else
        return NULL;
2827 2828
}

2829 2830
void bdrv_get_backing_filename(BlockDriverState *bs,
                               char *filename, int filename_size)
2831
{
2832 2833
    pstrcpy(filename, filename_size, bs->backing_file);
}
K
Kevin Wolf 已提交
2834

2835 2836 2837 2838 2839 2840 2841 2842 2843 2844
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
    BlockDriver *drv = bs->drv;
    if (!drv)
        return -ENOMEDIUM;
    if (!drv->bdrv_get_info)
        return -ENOTSUP;
    memset(bdi, 0, sizeof(*bdi));
    return drv->bdrv_get_info(bs, bdi);
}
2845

2846 2847 2848 2849 2850 2851 2852
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
{
    BlockDriver *drv = bs->drv;
    if (drv && drv->bdrv_get_specific_info) {
        return drv->bdrv_get_specific_info(bs);
    }
    return NULL;
2853 2854
}

2855
void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2856
{
2857 2858 2859
    if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
        return;
    }
2860

2861
    bs->drv->bdrv_debug_event(bs, event);
2862 2863
}

2864 2865
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
                          const char *tag)
2866
{
2867
    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
K
Kevin Wolf 已提交
2868
        bs = bs->file ? bs->file->bs : NULL;
2869
    }
2870

2871 2872 2873
    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
        return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
    }
2874

2875
    return -ENOTSUP;
2876 2877
}

2878
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
B
bellard 已提交
2879
{
2880
    while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
K
Kevin Wolf 已提交
2881
        bs = bs->file ? bs->file->bs : NULL;
2882
    }
2883

2884 2885 2886 2887 2888
    if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
        return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
    }

    return -ENOTSUP;
2889 2890
}

2891
int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2892
{
2893
    while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
K
Kevin Wolf 已提交
2894
        bs = bs->file ? bs->file->bs : NULL;
2895
    }
2896

2897 2898 2899
    if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
        return bs->drv->bdrv_debug_resume(bs, tag);
    }
2900

2901
    return -ENOTSUP;
2902 2903
}

2904
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2905
{
2906
    while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
K
Kevin Wolf 已提交
2907
        bs = bs->file ? bs->file->bs : NULL;
2908
    }
B
bellard 已提交
2909

2910 2911 2912
    if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
        return bs->drv->bdrv_debug_is_suspended(bs, tag);
    }
2913

2914 2915
    return false;
}
2916

2917
int bdrv_is_snapshot(BlockDriverState *bs)
2918
{
2919
    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2920 2921
}

2922 2923 2924 2925 2926 2927
/* backing_file can either be relative, or absolute, or a protocol.  If it is
 * relative, it must be relative to the chain.  So, passing in bs->filename
 * from a BDS as backing_file should not be done, as that may be relative to
 * the CWD rather than the chain. */
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
        const char *backing_file)
2928
{
2929 2930 2931 2932 2933 2934
    char *filename_full = NULL;
    char *backing_file_full = NULL;
    char *filename_tmp = NULL;
    int is_protocol = 0;
    BlockDriverState *curr_bs = NULL;
    BlockDriverState *retval = NULL;
2935

2936 2937
    if (!bs || !bs->drv || !backing_file) {
        return NULL;
2938 2939
    }

2940 2941 2942
    filename_full     = g_malloc(PATH_MAX);
    backing_file_full = g_malloc(PATH_MAX);
    filename_tmp      = g_malloc(PATH_MAX);
2943

2944
    is_protocol = path_has_protocol(backing_file);
2945

2946
    for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
2947

2948 2949 2950 2951
        /* If either of the filename paths is actually a protocol, then
         * compare unmodified paths; otherwise make paths relative */
        if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
            if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2952
                retval = curr_bs->backing->bs;
2953 2954 2955 2956 2957 2958 2959
                break;
            }
        } else {
            /* If not an absolute filename path, make it relative to the current
             * image's filename path */
            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
                         backing_file);
2960

2961 2962 2963 2964
            /* We are going to compare absolute pathnames */
            if (!realpath(filename_tmp, filename_full)) {
                continue;
            }
P
Paolo Bonzini 已提交
2965

2966 2967 2968 2969
            /* We need to make sure the backing filename we are comparing against
             * is relative to the current image filename (or absolute) */
            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
                         curr_bs->backing_file);
P
Paolo Bonzini 已提交
2970

2971 2972 2973
            if (!realpath(filename_tmp, backing_file_full)) {
                continue;
            }
K
Kevin Wolf 已提交
2974

2975
            if (strcmp(backing_file_full, filename_full) == 0) {
2976
                retval = curr_bs->backing->bs;
2977 2978 2979
                break;
            }
        }
K
Kevin Wolf 已提交
2980 2981
    }

2982 2983 2984 2985 2986 2987 2988 2989 2990 2991
    g_free(filename_full);
    g_free(backing_file_full);
    g_free(filename_tmp);
    return retval;
}

int bdrv_get_backing_file_depth(BlockDriverState *bs)
{
    if (!bs->drv) {
        return 0;
K
Kevin Wolf 已提交
2992 2993
    }

2994
    if (!bs->backing) {
2995
        return 0;
2996 2997
    }

2998
    return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
2999
}
P
Paolo Bonzini 已提交
3000

3001 3002 3003 3004
void bdrv_init(void)
{
    module_call_init(MODULE_INIT_BLOCK);
}
3005

3006 3007 3008 3009
void bdrv_init_with_whitelist(void)
{
    use_bdrv_whitelist = 1;
    bdrv_init();
P
Paolo Bonzini 已提交
3010 3011
}

3012
void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3013
{
3014 3015 3016
    Error *local_err = NULL;
    int ret;

3017 3018 3019 3020
    if (!bs->drv)  {
        return;
    }

3021 3022 3023 3024 3025
    if (!(bs->open_flags & BDRV_O_INCOMING)) {
        return;
    }
    bs->open_flags &= ~BDRV_O_INCOMING;

3026
    if (bs->drv->bdrv_invalidate_cache) {
3027
        bs->drv->bdrv_invalidate_cache(bs, &local_err);
3028
    } else if (bs->file) {
K
Kevin Wolf 已提交
3029
        bdrv_invalidate_cache(bs->file->bs, &local_err);
3030 3031 3032 3033
    }
    if (local_err) {
        error_propagate(errp, local_err);
        return;
3034
    }
3035

3036 3037 3038 3039 3040
    ret = refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
        return;
    }
3041 3042
}

3043
void bdrv_invalidate_cache_all(Error **errp)
3044 3045
{
    BlockDriverState *bs;
3046
    Error *local_err = NULL;
3047

3048
    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3049 3050 3051
        AioContext *aio_context = bdrv_get_aio_context(bs);

        aio_context_acquire(aio_context);
3052
        bdrv_invalidate_cache(bs, &local_err);
3053
        aio_context_release(aio_context);
3054 3055 3056 3057
        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
3058 3059 3060
    }
}

B
bellard 已提交
3061 3062 3063 3064 3065 3066
/**************************************************************/
/* removable device support */

/**
 * Return TRUE if the media is present
 */
3067
bool bdrv_is_inserted(BlockDriverState *bs)
B
bellard 已提交
3068 3069
{
    BlockDriver *drv = bs->drv;
3070
    BdrvChild *child;
3071

3072 3073 3074
    if (!drv) {
        return false;
    }
3075 3076 3077 3078 3079 3080 3081
    if (drv->bdrv_is_inserted) {
        return drv->bdrv_is_inserted(bs);
    }
    QLIST_FOREACH(child, &bs->children, next) {
        if (!bdrv_is_inserted(child->bs)) {
            return false;
        }
3082
    }
3083
    return true;
B
bellard 已提交
3084 3085 3086
}

/**
3087 3088
 * Return whether the media changed since the last call to this
 * function, or -ENOTSUP if we don't know.  Most drivers don't know.
B
bellard 已提交
3089 3090 3091 3092 3093
 */
int bdrv_media_changed(BlockDriverState *bs)
{
    BlockDriver *drv = bs->drv;

3094 3095 3096 3097
    if (drv && drv->bdrv_media_changed) {
        return drv->bdrv_media_changed(bs);
    }
    return -ENOTSUP;
B
bellard 已提交
3098 3099 3100 3101 3102
}

/**
 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
 */
3103
void bdrv_eject(BlockDriverState *bs, bool eject_flag)
B
bellard 已提交
3104 3105
{
    BlockDriver *drv = bs->drv;
3106
    const char *device_name;
B
bellard 已提交
3107

3108 3109
    if (drv && drv->bdrv_eject) {
        drv->bdrv_eject(bs, eject_flag);
B
bellard 已提交
3110
    }
3111

3112 3113 3114
    device_name = bdrv_get_device_name(bs);
    if (device_name[0] != '\0') {
        qapi_event_send_device_tray_moved(device_name,
3115
                                          eject_flag, &error_abort);
3116
    }
B
bellard 已提交
3117 3118 3119 3120 3121 3122
}

/**
 * Lock or unlock the media (if it is locked, the user won't be able
 * to eject it manually).
 */
3123
void bdrv_lock_medium(BlockDriverState *bs, bool locked)
B
bellard 已提交
3124 3125 3126
{
    BlockDriver *drv = bs->drv;

3127
    trace_bdrv_lock_medium(bs, locked);
S
Stefan Hajnoczi 已提交
3128

3129 3130
    if (drv && drv->bdrv_lock_medium) {
        drv->bdrv_lock_medium(bs, locked);
B
bellard 已提交
3131 3132
    }
}
3133

3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146
BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
{
    BdrvDirtyBitmap *bm;

    assert(name);
    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
        if (bm->name && !strcmp(name, bm->name)) {
            return bm;
        }
    }
    return NULL;
}

3147
void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3148
{
J
John Snow 已提交
3149
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
3150 3151 3152 3153 3154
    g_free(bitmap->name);
    bitmap->name = NULL;
}

BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3155
                                          uint32_t granularity,
3156
                                          const char *name,
3157
                                          Error **errp)
3158 3159
{
    int64_t bitmap_size;
F
Fam Zheng 已提交
3160
    BdrvDirtyBitmap *bitmap;
3161
    uint32_t sector_granularity;
3162

3163 3164
    assert((granularity & (granularity - 1)) == 0);

3165 3166 3167 3168
    if (name && bdrv_find_dirty_bitmap(bs, name)) {
        error_setg(errp, "Bitmap already exists: %s", name);
        return NULL;
    }
3169 3170
    sector_granularity = granularity >> BDRV_SECTOR_BITS;
    assert(sector_granularity);
3171
    bitmap_size = bdrv_nb_sectors(bs);
3172 3173 3174 3175 3176
    if (bitmap_size < 0) {
        error_setg_errno(errp, -bitmap_size, "could not get length of device");
        errno = -bitmap_size;
        return NULL;
    }
3177
    bitmap = g_new0(BdrvDirtyBitmap, 1);
3178
    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
J
John Snow 已提交
3179
    bitmap->size = bitmap_size;
3180
    bitmap->name = g_strdup(name);
J
John Snow 已提交
3181
    bitmap->disabled = false;
F
Fam Zheng 已提交
3182 3183 3184 3185
    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
    return bitmap;
}

J
John Snow 已提交
3186 3187 3188 3189 3190
bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
{
    return bitmap->successor;
}

J
John Snow 已提交
3191 3192
bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
{
J
John Snow 已提交
3193 3194 3195
    return !(bitmap->disabled || bitmap->successor);
}

J
John Snow 已提交
3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206
DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
{
    if (bdrv_dirty_bitmap_frozen(bitmap)) {
        return DIRTY_BITMAP_STATUS_FROZEN;
    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
        return DIRTY_BITMAP_STATUS_DISABLED;
    } else {
        return DIRTY_BITMAP_STATUS_ACTIVE;
    }
}

J
John Snow 已提交
3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288
/**
 * Create a successor bitmap destined to replace this bitmap after an operation.
 * Requires that the bitmap is not frozen and has no successor.
 */
int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
                                       BdrvDirtyBitmap *bitmap, Error **errp)
{
    uint64_t granularity;
    BdrvDirtyBitmap *child;

    if (bdrv_dirty_bitmap_frozen(bitmap)) {
        error_setg(errp, "Cannot create a successor for a bitmap that is "
                   "currently frozen");
        return -1;
    }
    assert(!bitmap->successor);

    /* Create an anonymous successor */
    granularity = bdrv_dirty_bitmap_granularity(bitmap);
    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
    if (!child) {
        return -1;
    }

    /* Successor will be on or off based on our current state. */
    child->disabled = bitmap->disabled;

    /* Install the successor and freeze the parent */
    bitmap->successor = child;
    return 0;
}

/**
 * For a bitmap with a successor, yield our name to the successor,
 * delete the old bitmap, and return a handle to the new bitmap.
 */
BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
                                            BdrvDirtyBitmap *bitmap,
                                            Error **errp)
{
    char *name;
    BdrvDirtyBitmap *successor = bitmap->successor;

    if (successor == NULL) {
        error_setg(errp, "Cannot relinquish control if "
                   "there's no successor present");
        return NULL;
    }

    name = bitmap->name;
    bitmap->name = NULL;
    successor->name = name;
    bitmap->successor = NULL;
    bdrv_release_dirty_bitmap(bs, bitmap);

    return successor;
}

/**
 * In cases of failure where we can no longer safely delete the parent,
 * we may wish to re-join the parent and child/successor.
 * The merged parent will be un-frozen, but not explicitly re-enabled.
 */
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
                                           BdrvDirtyBitmap *parent,
                                           Error **errp)
{
    BdrvDirtyBitmap *successor = parent->successor;

    if (!successor) {
        error_setg(errp, "Cannot reclaim a successor when none is present");
        return NULL;
    }

    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
        error_setg(errp, "Merging of parent and successor bitmap failed");
        return NULL;
    }
    bdrv_release_dirty_bitmap(bs, successor);
    parent->successor = NULL;

    return parent;
J
John Snow 已提交
3289 3290
}

3291 3292 3293 3294 3295 3296 3297 3298 3299
/**
 * Truncates _all_ bitmaps attached to a BDS.
 */
static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
{
    BdrvDirtyBitmap *bitmap;
    uint64_t size = bdrv_nb_sectors(bs);

    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3300
        assert(!bdrv_dirty_bitmap_frozen(bitmap));
3301
        hbitmap_truncate(bitmap->bitmap, size);
3302
        bitmap->size = size;
3303 3304 3305
    }
}

F
Fam Zheng 已提交
3306 3307 3308 3309 3310
void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
{
    BdrvDirtyBitmap *bm, *next;
    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
        if (bm == bitmap) {
J
John Snow 已提交
3311
            assert(!bdrv_dirty_bitmap_frozen(bm));
F
Fam Zheng 已提交
3312 3313
            QLIST_REMOVE(bitmap, list);
            hbitmap_free(bitmap->bitmap);
3314
            g_free(bitmap->name);
F
Fam Zheng 已提交
3315 3316
            g_free(bitmap);
            return;
3317
        }
3318 3319 3320
    }
}

J
John Snow 已提交
3321 3322
void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
J
John Snow 已提交
3323
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
J
John Snow 已提交
3324 3325 3326 3327 3328
    bitmap->disabled = true;
}

void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
J
John Snow 已提交
3329
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
J
John Snow 已提交
3330 3331 3332
    bitmap->disabled = false;
}

F
Fam Zheng 已提交
3333 3334 3335 3336 3337 3338 3339
BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
{
    BdrvDirtyBitmap *bm;
    BlockDirtyInfoList *list = NULL;
    BlockDirtyInfoList **plist = &list;

    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3340 3341
        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3342
        info->count = bdrv_get_dirty_count(bm);
3343
        info->granularity = bdrv_dirty_bitmap_granularity(bm);
3344 3345
        info->has_name = !!bm->name;
        info->name = g_strdup(bm->name);
J
John Snow 已提交
3346
        info->status = bdrv_dirty_bitmap_status(bm);
F
Fam Zheng 已提交
3347 3348 3349 3350 3351 3352 3353 3354
        entry->value = info;
        *plist = entry;
        plist = &entry->next;
    }

    return list;
}

F
Fam Zheng 已提交
3355
int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3356
{
F
Fam Zheng 已提交
3357 3358
    if (bitmap) {
        return hbitmap_get(bitmap->bitmap, sector);
3359 3360 3361 3362 3363
    } else {
        return 0;
    }
}

3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383
/**
 * Chooses a default granularity based on the existing cluster size,
 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
 * is no cluster size information available.
 */
uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
{
    BlockDriverInfo bdi;
    uint32_t granularity;

    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
        granularity = MAX(4096, bdi.cluster_size);
        granularity = MIN(65536, granularity);
    } else {
        granularity = 65536;
    }

    return granularity;
}

3384 3385 3386 3387 3388
uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
{
    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
}

3389
void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3390
{
F
Fam Zheng 已提交
3391
    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3392 3393
}

3394
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3395 3396
                           int64_t cur_sector, int nr_sectors)
{
J
John Snow 已提交
3397
    assert(bdrv_dirty_bitmap_enabled(bitmap));
3398 3399 3400
    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}

3401
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3402 3403
                             int64_t cur_sector, int nr_sectors)
{
J
John Snow 已提交
3404
    assert(bdrv_dirty_bitmap_enabled(bitmap));
3405 3406 3407
    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
}

J
John Snow 已提交
3408 3409 3410
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
{
    assert(bdrv_dirty_bitmap_enabled(bitmap));
3411
    hbitmap_reset_all(bitmap->bitmap);
J
John Snow 已提交
3412 3413
}

3414 3415
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                    int nr_sectors)
3416
{
F
Fam Zheng 已提交
3417 3418
    BdrvDirtyBitmap *bitmap;
    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
J
John Snow 已提交
3419 3420 3421
        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
            continue;
        }
F
Fam Zheng 已提交
3422 3423
        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
    }
3424 3425
}

3426 3427 3428 3429 3430 3431 3432 3433 3434
/**
 * Advance an HBitmapIter to an arbitrary offset.
 */
void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
{
    assert(hbi->hb);
    hbitmap_iter_init(hbi, hbi->hb, offset);
}

3435
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3436
{
F
Fam Zheng 已提交
3437
    return hbitmap_count(bitmap->bitmap);
3438
}
J
Jes Sorensen 已提交
3439

3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450
/* Get a reference to bs */
void bdrv_ref(BlockDriverState *bs)
{
    bs->refcnt++;
}

/* Release a previously grabbed reference to bs.
 * If after releasing, reference count is zero, the BlockDriverState is
 * deleted. */
void bdrv_unref(BlockDriverState *bs)
{
3451 3452 3453
    if (!bs) {
        return;
    }
3454 3455 3456 3457 3458 3459
    assert(bs->refcnt > 0);
    if (--bs->refcnt == 0) {
        bdrv_delete(bs);
    }
}

3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471
struct BdrvOpBlocker {
    Error *reason;
    QLIST_ENTRY(BdrvOpBlocker) list;
};

bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
{
    BdrvOpBlocker *blocker;
    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
    if (!QLIST_EMPTY(&bs->op_blockers[op])) {
        blocker = QLIST_FIRST(&bs->op_blockers[op]);
        if (errp) {
3472 3473
            error_setg(errp, "Node '%s' is busy: %s",
                       bdrv_get_device_or_node_name(bs),
3474
                       error_get_pretty(blocker->reason));
3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485
        }
        return true;
    }
    return false;
}

void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
{
    BdrvOpBlocker *blocker;
    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);

3486
    blocker = g_new0(BdrvOpBlocker, 1);
3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530
    blocker->reason = reason;
    QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
}

void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
{
    BdrvOpBlocker *blocker, *next;
    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
        if (blocker->reason == reason) {
            QLIST_REMOVE(blocker, list);
            g_free(blocker);
        }
    }
}

void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
{
    int i;
    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
        bdrv_op_block(bs, i, reason);
    }
}

void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
{
    int i;
    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
        bdrv_op_unblock(bs, i, reason);
    }
}

bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
{
    int i;

    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
        if (!QLIST_EMPTY(&bs->op_blockers[i])) {
            return false;
        }
    }
    return true;
}

3531 3532
void bdrv_img_create(const char *filename, const char *fmt,
                     const char *base_filename, const char *base_fmt,
3533 3534
                     char *options, uint64_t img_size, int flags,
                     Error **errp, bool quiet)
J
Jes Sorensen 已提交
3535
{
3536 3537 3538 3539
    QemuOptsList *create_opts = NULL;
    QemuOpts *opts = NULL;
    const char *backing_fmt, *backing_file;
    int64_t size;
J
Jes Sorensen 已提交
3540
    BlockDriver *drv, *proto_drv;
3541
    Error *local_err = NULL;
J
Jes Sorensen 已提交
3542 3543 3544 3545 3546
    int ret = 0;

    /* Find driver and parse its options */
    drv = bdrv_find_format(fmt);
    if (!drv) {
3547
        error_setg(errp, "Unknown file format '%s'", fmt);
3548
        return;
J
Jes Sorensen 已提交
3549 3550
    }

3551
    proto_drv = bdrv_find_protocol(filename, true, errp);
J
Jes Sorensen 已提交
3552
    if (!proto_drv) {
3553
        return;
J
Jes Sorensen 已提交
3554 3555
    }

3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567
    if (!drv->create_opts) {
        error_setg(errp, "Format driver '%s' does not support image creation",
                   drv->format_name);
        return;
    }

    if (!proto_drv->create_opts) {
        error_setg(errp, "Protocol driver '%s' does not support image creation",
                   proto_drv->format_name);
        return;
    }

C
Chunyan Liu 已提交
3568 3569
    create_opts = qemu_opts_append(create_opts, drv->create_opts);
    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
J
Jes Sorensen 已提交
3570 3571

    /* Create parameter list with default values */
3572
    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3573
    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
J
Jes Sorensen 已提交
3574 3575 3576

    /* Parse -o options */
    if (options) {
3577 3578 3579 3580
        qemu_opts_do_parse(opts, options, NULL, &local_err);
        if (local_err) {
            error_report_err(local_err);
            local_err = NULL;
3581
            error_setg(errp, "Invalid options for file format '%s'", fmt);
J
Jes Sorensen 已提交
3582 3583 3584 3585 3586
            goto out;
        }
    }

    if (base_filename) {
3587
        qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3588
        if (local_err) {
3589 3590
            error_setg(errp, "Backing file not supported for file format '%s'",
                       fmt);
J
Jes Sorensen 已提交
3591 3592 3593 3594 3595
            goto out;
        }
    }

    if (base_fmt) {
3596
        qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3597
        if (local_err) {
3598 3599
            error_setg(errp, "Backing file format not supported for file "
                             "format '%s'", fmt);
J
Jes Sorensen 已提交
3600 3601 3602 3603
            goto out;
        }
    }

3604 3605 3606
    backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
    if (backing_file) {
        if (!strcmp(filename, backing_file)) {
3607 3608
            error_setg(errp, "Error: Trying to create an image with the "
                             "same filename as the backing file");
3609 3610 3611 3612
            goto out;
        }
    }

3613
    backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
J
Jes Sorensen 已提交
3614 3615 3616

    // The size for the image must always be specified, with one exception:
    // If we are using a backing file, we can obtain the size from there
3617 3618 3619
    size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
    if (size == -1) {
        if (backing_file) {
3620
            BlockDriverState *bs;
3621
            char *full_backing = g_new0(char, PATH_MAX);
3622
            int64_t size;
3623
            int back_flags;
3624
            QDict *backing_options = NULL;
3625

3626 3627 3628 3629 3630 3631 3632 3633
            bdrv_get_full_backing_filename_from_filename(filename, backing_file,
                                                         full_backing, PATH_MAX,
                                                         &local_err);
            if (local_err) {
                g_free(full_backing);
                goto out;
            }

3634 3635 3636
            /* backing files always opened read-only */
            back_flags =
                flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
J
Jes Sorensen 已提交
3637

3638 3639 3640 3641 3642 3643
            if (backing_fmt) {
                backing_options = qdict_new();
                qdict_put(backing_options, "driver",
                          qstring_from_str(backing_fmt));
            }

3644
            bs = NULL;
3645
            ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3646
                            back_flags, &local_err);
3647
            g_free(full_backing);
J
Jes Sorensen 已提交
3648 3649 3650
            if (ret < 0) {
                goto out;
            }
3651 3652 3653 3654 3655 3656 3657
            size = bdrv_getlength(bs);
            if (size < 0) {
                error_setg_errno(errp, -size, "Could not get size of '%s'",
                                 backing_file);
                bdrv_unref(bs);
                goto out;
            }
J
Jes Sorensen 已提交
3658

3659
            qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3660 3661

            bdrv_unref(bs);
J
Jes Sorensen 已提交
3662
        } else {
3663
            error_setg(errp, "Image creation needs a size parameter");
J
Jes Sorensen 已提交
3664 3665 3666 3667
            goto out;
        }
    }

3668
    if (!quiet) {
3669
        printf("Formatting '%s', fmt=%s ", filename, fmt);
3670
        qemu_opts_print(opts, " ");
3671 3672
        puts("");
    }
3673

C
Chunyan Liu 已提交
3674
    ret = bdrv_create(drv, filename, opts, &local_err);
3675

3676 3677 3678 3679 3680
    if (ret == -EFBIG) {
        /* This is generally a better message than whatever the driver would
         * deliver (especially because of the cluster_size_hint), since that
         * is most probably not much different from "image too large". */
        const char *cluster_size_hint = "";
3681
        if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3682
            cluster_size_hint = " (try using a larger cluster size)";
J
Jes Sorensen 已提交
3683
        }
3684 3685 3686 3687
        error_setg(errp, "The image size is too large for file format '%s'"
                   "%s", fmt, cluster_size_hint);
        error_free(local_err);
        local_err = NULL;
J
Jes Sorensen 已提交
3688 3689 3690
    }

out:
3691 3692
    qemu_opts_del(opts);
    qemu_opts_free(create_opts);
3693
    if (local_err) {
3694 3695
        error_propagate(errp, local_err);
    }
J
Jes Sorensen 已提交
3696
}
3697 3698 3699

AioContext *bdrv_get_aio_context(BlockDriverState *bs)
{
3700 3701 3702 3703 3704
    return bs->aio_context;
}

void bdrv_detach_aio_context(BlockDriverState *bs)
{
M
Max Reitz 已提交
3705 3706
    BdrvAioNotifier *baf;

3707 3708 3709 3710
    if (!bs->drv) {
        return;
    }

M
Max Reitz 已提交
3711 3712 3713 3714
    QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
        baf->detach_aio_context(baf->opaque);
    }

3715
    if (bs->throttle_state) {
3716
        throttle_timers_detach_aio_context(&bs->throttle_timers);
3717
    }
3718 3719 3720 3721
    if (bs->drv->bdrv_detach_aio_context) {
        bs->drv->bdrv_detach_aio_context(bs);
    }
    if (bs->file) {
K
Kevin Wolf 已提交
3722
        bdrv_detach_aio_context(bs->file->bs);
3723
    }
3724 3725
    if (bs->backing) {
        bdrv_detach_aio_context(bs->backing->bs);
3726 3727 3728 3729 3730 3731 3732 3733
    }

    bs->aio_context = NULL;
}

void bdrv_attach_aio_context(BlockDriverState *bs,
                             AioContext *new_context)
{
M
Max Reitz 已提交
3734 3735
    BdrvAioNotifier *ban;

3736 3737 3738 3739 3740 3741
    if (!bs->drv) {
        return;
    }

    bs->aio_context = new_context;

3742 3743
    if (bs->backing) {
        bdrv_attach_aio_context(bs->backing->bs, new_context);
3744 3745
    }
    if (bs->file) {
K
Kevin Wolf 已提交
3746
        bdrv_attach_aio_context(bs->file->bs, new_context);
3747 3748 3749 3750
    }
    if (bs->drv->bdrv_attach_aio_context) {
        bs->drv->bdrv_attach_aio_context(bs, new_context);
    }
3751
    if (bs->throttle_state) {
3752
        throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3753
    }
M
Max Reitz 已提交
3754 3755 3756 3757

    QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
        ban->attached_aio_context(new_context, ban->opaque);
    }
3758 3759 3760 3761
}

void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
{
3762
    bdrv_drain(bs); /* ensure there are no in-flight requests */
3763 3764 3765 3766 3767 3768 3769 3770 3771

    bdrv_detach_aio_context(bs);

    /* This function executes in the old AioContext so acquire the new one in
     * case it runs in a different thread.
     */
    aio_context_acquire(new_context);
    bdrv_attach_aio_context(bs, new_context);
    aio_context_release(new_context);
3772
}
3773

M
Max Reitz 已提交
3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810
void bdrv_add_aio_context_notifier(BlockDriverState *bs,
        void (*attached_aio_context)(AioContext *new_context, void *opaque),
        void (*detach_aio_context)(void *opaque), void *opaque)
{
    BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
    *ban = (BdrvAioNotifier){
        .attached_aio_context = attached_aio_context,
        .detach_aio_context   = detach_aio_context,
        .opaque               = opaque
    };

    QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
}

void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
                                      void (*attached_aio_context)(AioContext *,
                                                                   void *),
                                      void (*detach_aio_context)(void *),
                                      void *opaque)
{
    BdrvAioNotifier *ban, *ban_next;

    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
        if (ban->attached_aio_context == attached_aio_context &&
            ban->detach_aio_context   == detach_aio_context   &&
            ban->opaque               == opaque)
        {
            QLIST_REMOVE(ban, list);
            g_free(ban);

            return;
        }
    }

    abort();
}

3811 3812
int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
                       BlockDriverAmendStatusCB *status_cb)
M
Max Reitz 已提交
3813
{
C
Chunyan Liu 已提交
3814
    if (!bs->drv->bdrv_amend_options) {
M
Max Reitz 已提交
3815 3816
        return -ENOTSUP;
    }
3817
    return bs->drv->bdrv_amend_options(bs, opts, status_cb);
M
Max Reitz 已提交
3818
}
3819

3820 3821 3822 3823
/* This function will be called by the bdrv_recurse_is_first_non_filter method
 * of block filter and by bdrv_is_first_non_filter.
 * It is used to test if the given bs is the candidate or recurse more in the
 * node graph.
3824
 */
3825
bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3826
                                      BlockDriverState *candidate)
3827
{
3828 3829
    /* return false if basic checks fails */
    if (!bs || !bs->drv) {
3830
        return false;
3831 3832
    }

3833 3834 3835 3836 3837
    /* the code reached a non block filter driver -> check if the bs is
     * the same as the candidate. It's the recursion termination condition.
     */
    if (!bs->drv->is_filter) {
        return bs == candidate;
3838
    }
3839
    /* Down this path the driver is a block filter driver */
3840

3841 3842 3843 3844
    /* If the block filter recursion method is defined use it to recurse down
     * the node graph.
     */
    if (bs->drv->bdrv_recurse_is_first_non_filter) {
3845
        return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3846 3847
    }

3848 3849 3850
    /* the driver is a block filter but don't allow to recurse -> return false
     */
    return false;
3851 3852
}

3853 3854 3855 3856 3857
/* This function checks if the candidate is the first non filter bs down it's
 * bs chain. Since we don't have pointers to parents it explore all bs chains
 * from the top. Some filters can choose not to pass down the recursion.
 */
bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3858
{
3859 3860 3861 3862 3863 3864
    BlockDriverState *bs;

    /* walk down the bs forest recursively */
    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
        bool perm;

3865
        /* try to recurse in this top level bs */
3866
        perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3867 3868 3869 3870 3871 3872 3873 3874

        /* candidate is the first non filter */
        if (perm) {
            return true;
        }
    }

    return false;
3875
}
3876

3877 3878
BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
                                        const char *node_name, Error **errp)
3879 3880
{
    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3881 3882
    AioContext *aio_context;

3883 3884 3885 3886 3887
    if (!to_replace_bs) {
        error_setg(errp, "Node name '%s' not found", node_name);
        return NULL;
    }

3888 3889 3890
    aio_context = bdrv_get_aio_context(to_replace_bs);
    aio_context_acquire(aio_context);

3891
    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3892 3893
        to_replace_bs = NULL;
        goto out;
3894 3895 3896 3897 3898 3899 3900
    }

    /* We don't want arbitrary node of the BDS chain to be replaced only the top
     * most non filter in order to prevent data corruption.
     * Another benefit is that this tests exclude backing files which are
     * blocked by the backing blockers.
     */
3901
    if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3902
        error_setg(errp, "Only top most non filter can be replaced");
3903 3904
        to_replace_bs = NULL;
        goto out;
3905 3906
    }

3907 3908
out:
    aio_context_release(aio_context);
3909 3910
    return to_replace_bs;
}
3911

M
Max Reitz 已提交
3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957
static bool append_open_options(QDict *d, BlockDriverState *bs)
{
    const QDictEntry *entry;
    bool found_any = false;

    for (entry = qdict_first(bs->options); entry;
         entry = qdict_next(bs->options, entry))
    {
        /* Only take options for this level and exclude all non-driver-specific
         * options */
        if (!strchr(qdict_entry_key(entry), '.') &&
            strcmp(qdict_entry_key(entry), "node-name"))
        {
            qobject_incref(qdict_entry_value(entry));
            qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
            found_any = true;
        }
    }

    return found_any;
}

/* Updates the following BDS fields:
 *  - exact_filename: A filename which may be used for opening a block device
 *                    which (mostly) equals the given BDS (even without any
 *                    other options; so reading and writing must return the same
 *                    results, but caching etc. may be different)
 *  - full_open_options: Options which, when given when opening a block device
 *                       (without a filename), result in a BDS (mostly)
 *                       equalling the given one
 *  - filename: If exact_filename is set, it is copied here. Otherwise,
 *              full_open_options is converted to a JSON object, prefixed with
 *              "json:" (for use through the JSON pseudo protocol) and put here.
 */
void bdrv_refresh_filename(BlockDriverState *bs)
{
    BlockDriver *drv = bs->drv;
    QDict *opts;

    if (!drv) {
        return;
    }

    /* This BDS's file name will most probably depend on its file's name, so
     * refresh that first */
    if (bs->file) {
K
Kevin Wolf 已提交
3958
        bdrv_refresh_filename(bs->file->bs);
M
Max Reitz 已提交
3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985
    }

    if (drv->bdrv_refresh_filename) {
        /* Obsolete information is of no use here, so drop the old file name
         * information before refreshing it */
        bs->exact_filename[0] = '\0';
        if (bs->full_open_options) {
            QDECREF(bs->full_open_options);
            bs->full_open_options = NULL;
        }

        drv->bdrv_refresh_filename(bs);
    } else if (bs->file) {
        /* Try to reconstruct valid information from the underlying file */
        bool has_open_options;

        bs->exact_filename[0] = '\0';
        if (bs->full_open_options) {
            QDECREF(bs->full_open_options);
            bs->full_open_options = NULL;
        }

        opts = qdict_new();
        has_open_options = append_open_options(opts, bs);

        /* If no specific options have been given for this BDS, the filename of
         * the underlying file should suffice for this one as well */
K
Kevin Wolf 已提交
3986 3987
        if (bs->file->bs->exact_filename[0] && !has_open_options) {
            strcpy(bs->exact_filename, bs->file->bs->exact_filename);
M
Max Reitz 已提交
3988 3989 3990 3991 3992 3993
        }
        /* Reconstructing the full options QDict is simple for most format block
         * drivers, as long as the full options are known for the underlying
         * file BDS. The full options QDict of that file BDS should somehow
         * contain a representation of the filename, therefore the following
         * suffices without querying the (exact_)filename of this BDS. */
K
Kevin Wolf 已提交
3994
        if (bs->file->bs->full_open_options) {
M
Max Reitz 已提交
3995 3996
            qdict_put_obj(opts, "driver",
                          QOBJECT(qstring_from_str(drv->format_name)));
K
Kevin Wolf 已提交
3997 3998 3999
            QINCREF(bs->file->bs->full_open_options);
            qdict_put_obj(opts, "file",
                          QOBJECT(bs->file->bs->full_open_options));
M
Max Reitz 已提交
4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041

            bs->full_open_options = opts;
        } else {
            QDECREF(opts);
        }
    } else if (!bs->full_open_options && qdict_size(bs->options)) {
        /* There is no underlying file BDS (at least referenced by BDS.file),
         * so the full options QDict should be equal to the options given
         * specifically for this block device when it was opened (plus the
         * driver specification).
         * Because those options don't change, there is no need to update
         * full_open_options when it's already set. */

        opts = qdict_new();
        append_open_options(opts, bs);
        qdict_put_obj(opts, "driver",
                      QOBJECT(qstring_from_str(drv->format_name)));

        if (bs->exact_filename[0]) {
            /* This may not work for all block protocol drivers (some may
             * require this filename to be parsed), but we have to find some
             * default solution here, so just include it. If some block driver
             * does not support pure options without any filename at all or
             * needs some special format of the options QDict, it needs to
             * implement the driver-specific bdrv_refresh_filename() function.
             */
            qdict_put_obj(opts, "filename",
                          QOBJECT(qstring_from_str(bs->exact_filename)));
        }

        bs->full_open_options = opts;
    }

    if (bs->exact_filename[0]) {
        pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
    } else if (bs->full_open_options) {
        QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
        snprintf(bs->filename, sizeof(bs->filename), "json:%s",
                 qstring_get_str(json));
        QDECREF(json);
    }
}