block_int.h 52.5 KB
Newer Older
B
bellard 已提交
1 2
/*
 * QEMU System Emulator block driver
3
 *
B
bellard 已提交
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
#ifndef BLOCK_INT_H
#define BLOCK_INT_H

27
#include "block/accounting.h"
28
#include "block/block.h"
29
#include "block/aio-wait.h"
30
#include "qemu/queue.h"
31
#include "qemu/coroutine.h"
32
#include "qemu/stats64.h"
33
#include "qemu/timer.h"
34
#include "qemu/hbitmap.h"
35
#include "block/snapshot.h"
36
#include "qemu/main-loop.h"
37
#include "qemu/throttle.h"
P
pbrook 已提交
38

39
#define BLOCK_FLAG_LAZY_REFCOUNTS   8
40

41 42
#define BLOCK_OPT_SIZE              "size"
#define BLOCK_OPT_ENCRYPT           "encryption"
43
#define BLOCK_OPT_ENCRYPT_FORMAT    "encrypt.format"
44
#define BLOCK_OPT_COMPAT6           "compat6"
45
#define BLOCK_OPT_HWVERSION         "hwversion"
46 47 48 49 50 51 52 53
#define BLOCK_OPT_BACKING_FILE      "backing_file"
#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
#define BLOCK_OPT_TABLE_SIZE        "table_size"
#define BLOCK_OPT_PREALLOC          "preallocation"
#define BLOCK_OPT_SUBFMT            "subformat"
#define BLOCK_OPT_COMPAT_LEVEL      "compat"
#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
54
#define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
55
#define BLOCK_OPT_REDUNDANCY        "redundancy"
56
#define BLOCK_OPT_NOCOW             "nocow"
57
#define BLOCK_OPT_OBJECT_SIZE       "object_size"
58
#define BLOCK_OPT_REFCOUNT_BITS     "refcount_bits"
59
#define BLOCK_OPT_DATA_FILE         "data_file"
60
#define BLOCK_OPT_DATA_FILE_RAW     "data_file_raw"
61

62 63
#define BLOCK_PROBE_BUF_SIZE        512

64 65 66 67
enum BdrvTrackedRequestType {
    BDRV_TRACKED_READ,
    BDRV_TRACKED_WRITE,
    BDRV_TRACKED_DISCARD,
68
    BDRV_TRACKED_TRUNCATE,
69 70
};

71 72
typedef struct BdrvTrackedRequest {
    BlockDriverState *bs;
73
    int64_t offset;
74
    uint64_t bytes;
75
    enum BdrvTrackedRequestType type;
76

77
    bool serialising;
78
    int64_t overlap_offset;
79
    uint64_t overlap_bytes;
80

81 82 83
    QLIST_ENTRY(BdrvTrackedRequest) list;
    Coroutine *co; /* owner, used for deadlock detection */
    CoQueue wait_queue; /* coroutines blocked on this request */
84 85

    struct BdrvTrackedRequest *waiting_for;
86 87
} BdrvTrackedRequest;

B
bellard 已提交
88 89 90
struct BlockDriver {
    const char *format_name;
    int instance_size;
91

92 93 94 95 96
    /* set to true if the BlockDriver is a block filter. Block filters pass
     * certain callbacks that refer to data (see block.c) to their bs->file if
     * the driver doesn't implement them. Drivers that do not wish to forward
     * must implement them and return -ENOTSUP.
     */
97 98 99
    bool is_filter;
    /* for snapshots block filter like Quorum can implement the
     * following recursive callback.
100 101 102
     * It's purpose is to recurse on the filter children while calling
     * bdrv_recurse_is_first_non_filter on them.
     * For a sample implementation look in the future Quorum block filter.
103
     */
104 105
    bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
                                             BlockDriverState *candidate);
106

B
bellard 已提交
107
    int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
108
    int (*bdrv_probe_device)(const char *filename);
109 110 111

    /* Any driver implementing this callback is expected to be able to handle
     * NULL file names in its .bdrv_open() implementation */
112
    void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
113 114 115 116 117 118
    /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
     * this field set to true, except ones that are defined only by their
     * child's bs.
     * An example of the last type will be the quorum block driver.
     */
    bool bdrv_needs_filename;
119

120 121 122
    /* Set if a driver can support backing files */
    bool supports_backing;

123 124 125 126 127
    /* For handling image reopen for split or non-split files */
    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
                               BlockReopenQueue *queue, Error **errp);
    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
128
    void (*bdrv_join_options)(QDict *options, QDict *old_options);
129

M
Max Reitz 已提交
130 131
    int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp);
132 133

    /* Protocol drivers should implement this instead of bdrv_open */
M
Max Reitz 已提交
134 135
    int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp);
B
bellard 已提交
136
    void (*bdrv_close)(BlockDriverState *bs);
137
    int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
138
                                       Error **errp);
139 140 141
    int coroutine_fn (*bdrv_co_create_opts)(const char *filename,
                                            QemuOpts *opts,
                                            Error **errp);
142
    int (*bdrv_make_empty)(BlockDriverState *bs);
M
Max Reitz 已提交
143

144 145 146 147 148
    /*
     * Refreshes the bs->exact_filename field. If that is impossible,
     * bs->exact_filename has to be left empty.
     */
    void (*bdrv_refresh_filename)(BlockDriverState *bs);
M
Max Reitz 已提交
149

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
    /*
     * Gathers the open options for all children into @target.
     * A simple format driver (without backing file support) might
     * implement this function like this:
     *
     *     QINCREF(bs->file->bs->full_open_options);
     *     qdict_put(target, "file", bs->file->bs->full_open_options);
     *
     * If not specified, the generic implementation will simply put
     * all children's options under their respective name.
     *
     * @backing_overridden is true when bs->backing seems not to be
     * the child that would result from opening bs->backing_file.
     * Therefore, if it is true, the backing child's options should be
     * gathered; otherwise, there is no need since the backing child
     * is the one implied by the image header.
     *
     * Note that ideally this function would not be needed.  Every
     * block driver which implements it is probably doing something
     * shady regarding its runtime option structure.
     */
    void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
                                      bool backing_overridden);

M
Max Reitz 已提交
174 175 176 177 178 179 180
    /*
     * Returns an allocated string which is the directory name of this BDS: It
     * will be used to make relative filenames absolute by prepending this
     * function's return value to them.
     */
    char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);

B
bellard 已提交
181
    /* aio */
182 183 184 185 186 187
    BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
        BlockCompletionFunc *cb, void *opaque);
    BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
        BlockCompletionFunc *cb, void *opaque);
188
    BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
189
        BlockCompletionFunc *cb, void *opaque);
190
    BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
191
        int64_t offset, int bytes,
192
        BlockCompletionFunc *cb, void *opaque);
B
bellard 已提交
193

K
Kevin Wolf 已提交
194 195
    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211

    /**
     * @offset: position in bytes to read at
     * @bytes: number of bytes to read
     * @qiov: the buffers to fill with read data
     * @flags: currently unused, always 0
     *
     * @offset and @bytes will be a multiple of 'request_alignment',
     * but the length of individual @qiov elements does not have to
     * be a multiple.
     *
     * @bytes will always equal the total size of @qiov, and will be
     * no larger than 'max_transfer'.
     *
     * The buffer in @qiov may point directly to guest memory.
     */
212 213
    int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
K
Kevin Wolf 已提交
214
    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
215
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
    /**
     * @offset: position in bytes to write at
     * @bytes: number of bytes to write
     * @qiov: the buffers containing data to write
     * @flags: zero or more bits allowed by 'supported_write_flags'
     *
     * @offset and @bytes will be a multiple of 'request_alignment',
     * but the length of individual @qiov elements does not have to
     * be a multiple.
     *
     * @bytes will always equal the total size of @qiov, and will be
     * no larger than 'max_transfer'.
     *
     * The buffer in @qiov may point directly to guest memory.
     */
231 232
    int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
233

234 235 236
    /*
     * Efficiently zero a region of the disk image.  Typically an image format
     * would use a compact metadata representation to implement this.  This
237 238
     * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
     * will be called instead.
239
     */
E
Eric Blake 已提交
240
    int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
241
        int64_t offset, int bytes, BdrvRequestFlags flags);
242
    int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
243
        int64_t offset, int bytes);
244

245 246 247 248 249 250 251 252 253 254 255 256 257
    /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
     * and invoke bdrv_co_copy_range_from(child, ...), or invoke
     * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
     *
     * See the comment of bdrv_co_copy_range for the parameter and return value
     * semantics.
     */
    int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
                                                BdrvChild *src,
                                                uint64_t offset,
                                                BdrvChild *dst,
                                                uint64_t dst_offset,
                                                uint64_t bytes,
258 259
                                                BdrvRequestFlags read_flags,
                                                BdrvRequestFlags write_flags);
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274

    /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
     * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
     * operation if @bs is the leaf and @src has the same BlockDriver.  Return
     * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
     *
     * See the comment of bdrv_co_copy_range for the parameter and return value
     * semantics.
     */
    int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
                                              BdrvChild *src,
                                              uint64_t src_offset,
                                              BdrvChild *dst,
                                              uint64_t dst_offset,
                                              uint64_t bytes,
275 276
                                              BdrvRequestFlags read_flags,
                                              BdrvRequestFlags write_flags);
277

278
    /*
279 280
     * Building block for bdrv_block_status[_above] and
     * bdrv_is_allocated[_above].  The driver should answer only
281 282 283 284 285 286 287 288 289 290 291 292
     * according to the current layer, and should only need to set
     * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
     * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
     * layer, the result should be 0 (and not BDRV_BLOCK_ZERO).  See
     * block.h for the overall meaning of the bits.  As a hint, the
     * flag want_zero is true if the caller cares more about precise
     * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
     * overall allocation (favor larger *pnum, perhaps by reporting
     * _DATA instead of _ZERO).  The block layer guarantees input
     * clamped to bdrv_getlength() and aligned to request_alignment,
     * as well as non-NULL pnum, map, and file; in turn, the driver
     * must return an error or set pnum to an aligned non-zero value.
293
     */
294 295 296
    int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
        bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
        int64_t *map, BlockDriverState **file);
K
Kevin Wolf 已提交
297

298 299 300
    /*
     * Invalidate any cached meta-data.
     */
301 302
    void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
                                                  Error **errp);
303
    int (*bdrv_inactivate)(BlockDriverState *bs);
304

P
Pavel Dovgalyuk 已提交
305 306 307 308 309 310 311
    /*
     * Flushes all data for all layers by calling bdrv_co_flush for underlying
     * layers, if needed. This function is needed for deterministic
     * synchronization of the flush finishing callback.
     */
    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);

312 313
    /*
     * Flushes all data that was already written to the OS all the way down to
314
     * the disk (for example file-posix.c calls fsync()).
315 316 317
     */
    int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);

K
Kevin Wolf 已提交
318 319 320 321 322 323 324
    /*
     * Flushes all internal caches to the OS. The data may still sit in a
     * writeback cache of the host OS, but it will survive a crash of the qemu
     * process.
     */
    int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);

325 326 327 328 329 330
    /*
     * Drivers setting this field must be able to work with just a plain
     * filename with '<protocol_name>:' as a prefix, and no other options.
     * Options may be extracted from the filename by implementing
     * bdrv_parse_filename.
     */
B
bellard 已提交
331
    const char *protocol_name;
332 333
    int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
                                         PreallocMode prealloc, Error **errp);
334

B
bellard 已提交
335
    int64_t (*bdrv_getlength)(BlockDriverState *bs);
336
    bool has_variable_length;
337
    int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
S
Stefan Hajnoczi 已提交
338 339
    BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
                                      Error **errp);
340

341 342 343
    int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);

344
    int (*bdrv_snapshot_create)(BlockDriverState *bs,
B
bellard 已提交
345
                                QEMUSnapshotInfo *sn_info);
346
    int (*bdrv_snapshot_goto)(BlockDriverState *bs,
B
bellard 已提交
347
                              const char *snapshot_id);
348 349 350 351
    int (*bdrv_snapshot_delete)(BlockDriverState *bs,
                                const char *snapshot_id,
                                const char *name,
                                Error **errp);
352
    int (*bdrv_snapshot_list)(BlockDriverState *bs,
B
bellard 已提交
353
                              QEMUSnapshotInfo **psn_info);
E
edison 已提交
354
    int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
355 356 357
                                  const char *snapshot_id,
                                  const char *name,
                                  Error **errp);
B
bellard 已提交
358
    int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
359 360
    ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
                                                 Error **errp);
B
bellard 已提交
361

362 363 364 365 366 367
    int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
                                          QEMUIOVector *qiov,
                                          int64_t pos);
    int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
                                          QEMUIOVector *qiov,
                                          int64_t pos);
368

K
Kevin Wolf 已提交
369 370 371
    int (*bdrv_change_backing_file)(BlockDriverState *bs,
        const char *backing_file, const char *backing_fmt);

B
bellard 已提交
372
    /* removable device specific */
373
    bool (*bdrv_is_inserted)(BlockDriverState *bs);
374
    void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
375
    void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
376

377
    /* to control generic scsi devices */
378
    BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
379
        unsigned long int req, void *buf,
380
        BlockCompletionFunc *cb, void *opaque);
381 382
    int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
                                      unsigned long int req, void *buf);
383

384
    /* List of options for creating images, terminated by name == NULL */
385
    QemuOptsList *create_opts;
386 387 388 389 390 391 392 393
    /*
     * If this driver supports reopening images this contains a
     * NULL-terminated list of the runtime options that can be
     * modified. If an option in this list is unspecified during
     * reopen then it _must_ be reset to its default value or return
     * an error.
     */
    const char *const *mutable_opts;
394

395 396 397 398
    /*
     * Returns 0 for completed check, -errno for internal errors.
     * The check results are stored in result.
     */
399 400 401
    int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
                                      BdrvCheckResult *result,
                                      BdrvCheckMode fix);
A
aliguori 已提交
402

403
    int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
404
                              BlockDriverAmendStatusCB *status_cb,
405 406
                              void *cb_opaque,
                              Error **errp);
M
Max Reitz 已提交
407

408
    void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
K
Kevin Wolf 已提交
409

K
Kevin Wolf 已提交
410 411 412
    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
        const char *tag);
F
Fam Zheng 已提交
413 414
    int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
        const char *tag);
K
Kevin Wolf 已提交
415 416 417
    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);

418
    void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
419

K
Kevin Wolf 已提交
420 421 422 423 424
    /*
     * Returns 1 if newly created images are guaranteed to contain only
     * zeros, 0 otherwise.
     */
    int (*bdrv_has_zero_init)(BlockDriverState *bs);
425

426 427 428 429 430 431 432 433 434 435 436 437 438
    /* Remove fd handlers, timers, and other event loop callbacks so the event
     * loop is no longer in use.  Called with no in-flight requests and in
     * depth-first traversal order with parents before child nodes.
     */
    void (*bdrv_detach_aio_context)(BlockDriverState *bs);

    /* Add fd handlers, timers, and other event loop callbacks so I/O requests
     * can be processed again.  Called with no in-flight requests and in
     * depth-first traversal order with child nodes before parent nodes.
     */
    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
                                    AioContext *new_context);

439 440 441 442
    /* io queue for linux-aio */
    void (*bdrv_io_plug)(BlockDriverState *bs);
    void (*bdrv_io_unplug)(BlockDriverState *bs);

443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
    /**
     * Try to get @bs's logical and physical block size.
     * On success, store them in @bsz and return zero.
     * On failure, return negative errno.
     */
    int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
    /**
     * Try to get @bs's geometry (cyls, heads, sectors)
     * On success, store them in @geo and return 0.
     * On failure return -errno.
     * Only drivers that want to override guest geometry implement this
     * callback; see hd_geometry_guess().
     */
    int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);

458
    /**
459
     * bdrv_co_drain_begin is called if implemented in the beginning of a
460 461 462 463 464 465 466
     * drain operation to drain and stop any internal sources of requests in
     * the driver.
     * bdrv_co_drain_end is called if implemented at the end of the drain.
     *
     * They should be used by the driver to e.g. manage scheduled I/O
     * requests, or toggle an internal state. After the end of the drain new
     * requests will continue normally.
467
     */
468
    void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
469
    void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
470

471 472 473 474 475
    void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
                           Error **errp);
    void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
                           Error **errp);

476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
    /**
     * Informs the block driver that a permission change is intended. The
     * driver checks whether the change is permissible and may take other
     * preparations for the change (e.g. get file system locks). This operation
     * is always followed either by a call to either .bdrv_set_perm or
     * .bdrv_abort_perm_update.
     *
     * Checks whether the requested set of cumulative permissions in @perm
     * can be granted for accessing @bs and whether no other users are using
     * permissions other than those given in @shared (both arguments take
     * BLK_PERM_* bitmasks).
     *
     * If both conditions are met, 0 is returned. Otherwise, -errno is returned
     * and errp is set to an error describing the conflict.
     */
    int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
                           uint64_t shared, Error **errp);

    /**
     * Called to inform the driver that the set of cumulative set of used
     * permissions for @bs has changed to @perm, and the set of sharable
     * permission to @shared. The driver can use this to propagate changes to
     * its children (i.e. request permissions only if a parent actually needs
     * them).
     *
     * This function is only invoked after bdrv_check_perm(), so block drivers
     * may rely on preparations made in their .bdrv_check_perm implementation.
     */
    void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);

    /*
     * Called to inform the driver that after a previous bdrv_check_perm()
     * call, the permission update is not performed and any preparations made
     * for it (e.g. taken file locks) need to be undone.
     *
     * This function can be called even for nodes that never saw a
     * bdrv_check_perm() call. It is a no-op then.
     */
    void (*bdrv_abort_perm_update)(BlockDriverState *bs);

    /**
     * Returns in @nperm and @nshared the permissions that the driver for @bs
     * needs on its child @c, based on the cumulative permissions requested by
     * the parents in @parent_perm and @parent_shared.
     *
     * If @c is NULL, return the permissions for attaching a new child for the
     * given @role.
523 524 525 526
     *
     * If @reopen_queue is non-NULL, don't return the currently needed
     * permissions, but those that will be needed after applying the
     * @reopen_queue.
527 528 529
     */
     void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
                             const BdrvChildRole *role,
530
                             BlockReopenQueue *reopen_queue,
531 532 533
                             uint64_t parent_perm, uint64_t parent_shared,
                             uint64_t *nperm, uint64_t *nshared);

534 535 536 537 538 539
    /**
     * Bitmaps should be marked as 'IN_USE' in the image on reopening image
     * as rw. This handler should realize it. It also should unset readonly
     * field of BlockDirtyBitmap's in case of success.
     */
    int (*bdrv_reopen_bitmaps_rw)(BlockDriverState *bs, Error **errp);
540 541 542 543
    bool (*bdrv_can_store_new_dirty_bitmap)(BlockDriverState *bs,
                                            const char *name,
                                            uint32_t granularity,
                                            Error **errp);
544 545 546
    void (*bdrv_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
                                                const char *name,
                                                Error **errp);
547

F
Fam Zheng 已提交
548 549 550 551 552 553 554 555 556
    /**
     * Register/unregister a buffer for I/O. For example, when the driver is
     * interested to know the memory areas that will later be used in iovs, so
     * that it can do IOMMU mapping with VFIO etc., in order to get better
     * performance. In the case of VFIO drivers, this callback is used to do
     * DMA mapping for hot buffers.
     */
    void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
    void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
557
    QLIST_ENTRY(BlockDriver) list;
558 559 560 561 562 563 564

    /* Pointer to a NULL-terminated array of names of strong options
     * that can be specified for bdrv_open(). A strong option is one
     * that changes the data of a BDS.
     * If this pointer is NULL, the array is considered empty.
     * "filename" and "driver" are always considered strong. */
    const char *const *strong_runtime_opts;
B
bellard 已提交
565 566
};

567
typedef struct BlockLimits {
568 569 570 571 572 573
    /* Alignment requirement, in bytes, for offset/length of I/O
     * requests. Must be a power of 2 less than INT_MAX; defaults to
     * 1 for drivers with modern byte interfaces, and to 512
     * otherwise. */
    uint32_t request_alignment;

574 575
    /* Maximum number of bytes that can be discarded at once (since it
     * is signed, it must be < 2G, if set). Must be multiple of
576 577 578 579
     * pdiscard_alignment, but need not be power of 2. May be 0 if no
     * inherent 32-bit limit */
    int32_t max_pdiscard;

580 581 582 583
    /* Optimal alignment for discard requests in bytes. A power of 2
     * is best but not mandatory.  Must be a multiple of
     * bl.request_alignment, and must be less than max_pdiscard if
     * that is set. May be 0 if bl.request_alignment is good enough */
584
    uint32_t pdiscard_alignment;
585

586 587
    /* Maximum number of bytes that can zeroized at once (since it is
     * signed, it must be < 2G, if set). Must be multiple of
588
     * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
589
    int32_t max_pwrite_zeroes;
590

591 592 593 594 595
    /* Optimal alignment for write zeroes requests in bytes. A power
     * of 2 is best but not mandatory.  Must be a multiple of
     * bl.request_alignment, and must be less than max_pwrite_zeroes
     * if that is set. May be 0 if bl.request_alignment is good
     * enough */
596
    uint32_t pwrite_zeroes_alignment;
597

598 599 600
    /* Optimal transfer length in bytes.  A power of 2 is best but not
     * mandatory.  Must be a multiple of bl.request_alignment, or 0 if
     * no preferred size */
601 602
    uint32_t opt_transfer;

603 604 605 606
    /* Maximal transfer length in bytes.  Need not be power of 2, but
     * must be multiple of opt_transfer and bl.request_alignment, or 0
     * for no 32-bit limit.  For now, anything larger than INT_MAX is
     * clamped down. */
607
    uint32_t max_transfer;
608

609
    /* memory alignment, in bytes so that no bounce buffer is needed */
610 611
    size_t min_mem_alignment;

612
    /* memory alignment, in bytes, for bounce buffer */
613
    size_t opt_mem_alignment;
614 615 616

    /* maximum number of iovec elements */
    int max_iov;
617 618
} BlockLimits;

619 620
typedef struct BdrvOpBlocker BdrvOpBlocker;

M
Max Reitz 已提交
621 622 623 624 625
typedef struct BdrvAioNotifier {
    void (*attached_aio_context)(AioContext *new_context, void *opaque);
    void (*detach_aio_context)(void *opaque);

    void *opaque;
626
    bool deleted;
M
Max Reitz 已提交
627 628 629 630

    QLIST_ENTRY(BdrvAioNotifier) list;
} BdrvAioNotifier;

631
struct BdrvChildRole {
632 633
    /* If true, bdrv_replace_node() doesn't change the node this BdrvChild
     * points to. */
634 635
    bool stay_at_node;

636 637 638 639 640 641
    /* If true, the parent is a BlockDriverState and bdrv_next_all_states()
     * will return it. This information is used for drain_all, where every node
     * will be drained separately, so the drain only needs to be propagated to
     * non-BDS parents. */
    bool parent_is_bds;

642 643
    void (*inherit_options)(int *child_flags, QDict *child_options,
                            int parent_flags, QDict *parent_options);
644

645 646 647
    void (*change_media)(BdrvChild *child, bool load);
    void (*resize)(BdrvChild *child);

648 649 650
    /* Returns a name that is supposedly more useful for human users than the
     * node name for identifying the node in question (in particular, a BB
     * name), or NULL if the parent can't provide a better name. */
651
    const char *(*get_name)(BdrvChild *child);
652

653 654 655 656
    /* Returns a malloced string that describes the parent of the child for a
     * human reader. This could be a node-name, BlockBackend name, qdev ID or
     * QOM path of the device owning the BlockBackend, job type and ID etc. The
     * caller is responsible for freeing the memory. */
657
    char *(*get_parent_desc)(BdrvChild *child);
658

659 660 661 662 663
    /*
     * If this pair of functions is implemented, the parent doesn't issue new
     * requests after returning from .drained_begin() until .drained_end() is
     * called.
     *
664 665 666
     * These functions must not change the graph (and therefore also must not
     * call aio_poll(), which could change the graph indirectly).
     *
667 668 669 670 671
     * Note that this can be nested. If drained_begin() was called twice, new
     * I/O is allowed only after drained_end() was called twice, too.
     */
    void (*drained_begin)(BdrvChild *child);
    void (*drained_end)(BdrvChild *child);
672

673 674 675 676 677 678 679
    /*
     * Returns whether the parent has pending requests for the child. This
     * callback is polled after .drained_begin() has been called until all
     * activity on the child has stopped.
     */
    bool (*drained_poll)(BdrvChild *child);

680 681 682
    /* Notifies the parent that the child has been activated/inactivated (e.g.
     * when migration is completing) and it can start/stop requesting
     * permissions and doing I/O on it. */
683
    void (*activate)(BdrvChild *child, Error **errp);
684
    int (*inactivate)(BdrvChild *child);
685

686 687
    void (*attach)(BdrvChild *child);
    void (*detach)(BdrvChild *child);
688 689 690 691 692 693

    /* Notifies the parent that the filename of its child has changed (e.g.
     * because the direct child was removed from the backing chain), so that it
     * can update its reference. */
    int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
                           const char *filename, Error **errp);
694 695 696

    bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
                            GSList **ignore, Error **errp);
697
    void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
698 699 700 701
};

extern const BdrvChildRole child_file;
extern const BdrvChildRole child_format;
K
Kevin Wolf 已提交
702
extern const BdrvChildRole child_backing;
703

K
Kevin Wolf 已提交
704
struct BdrvChild {
705
    BlockDriverState *bs;
706
    char *name;
707
    const BdrvChildRole *role;
K
Kevin Wolf 已提交
708
    void *opaque;
709 710 711 712 713 714 715 716 717 718 719 720

    /**
     * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
     */
    uint64_t perm;

    /**
     * Permissions that can still be granted to other users of @bs while this
     * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
     */
    uint64_t shared_perm;

721 722 723 724 725
    /* backup of permissions during permission update procedure */
    bool has_backup_perm;
    uint64_t backup_perm;
    uint64_t backup_shared_perm;

726 727 728 729 730 731
    /*
     * This link is frozen: the child can neither be replaced nor
     * detached from the parent.
     */
    bool frozen;

732
    QLIST_ENTRY(BdrvChild) next;
K
Kevin Wolf 已提交
733
    QLIST_ENTRY(BdrvChild) next_parent;
K
Kevin Wolf 已提交
734
};
735

736 737 738 739 740 741
/*
 * Note: the function bdrv_append() copies and swaps contents of
 * BlockDriverStates, so if you add new fields to this struct, please
 * inspect bdrv_append() to determine if the new fields need to be
 * copied as well.
 */
B
bellard 已提交
742
struct BlockDriverState {
743 744 745
    /* Protected by big QEMU lock or read-only after opening.  No special
     * locking needed during I/O...
     */
746
    int open_flags; /* flags used to open the file, re-used for re-open */
747 748 749 750
    bool read_only; /* if true, the media is read only */
    bool encrypted; /* if true, the media is encrypted */
    bool sg;        /* if true, the device is a /dev/sg* */
    bool probed;    /* if true, format was probed rather than specified */
751
    bool force_share; /* if true, always allow all shared permissions */
752
    bool implicit;  /* if true, this filter node was automatically inserted */
753

B
bellard 已提交
754
    BlockDriver *drv; /* NULL means no media */
B
bellard 已提交
755 756
    void *opaque;

757
    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
M
Max Reitz 已提交
758 759 760 761
    /* long-running tasks intended to always use the same AioContext as this
     * BDS may register themselves in this list to be notified of changes
     * regarding this BDS's context */
    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
762
    bool walking_aio_notifiers; /* to make removal during iteration safe */
763

764 765 766
    char filename[PATH_MAX];
    char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
                                    this file image */
M
Max Reitz 已提交
767 768 769 770
    /* The backing filename indicated by the image header; if we ever
     * open this file, then this is replaced by the resulting BDS's
     * filename (i.e. after a bdrv_refresh_filename() run). */
    char auto_backing_file[PATH_MAX];
771
    char backing_format[16]; /* if non-zero and backing_file exists */
B
bellard 已提交
772

M
Max Reitz 已提交
773
    QDict *full_open_options;
774
    char exact_filename[PATH_MAX];
M
Max Reitz 已提交
775

776
    BdrvChild *backing;
K
Kevin Wolf 已提交
777
    BdrvChild *file;
778

779 780 781
    /* I/O Limits */
    BlockLimits bl;

782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
    /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
     * BDRV_REQ_WRITE_UNCHANGED).
     * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
     * writes will be issued as normal writes without the flag set.
     * This is important to note for drivers that do not explicitly
     * request a WRITE permission for their children and instead take
     * the same permissions as their parent did (this is commonly what
     * block filters do).  Such drivers have to be aware that the
     * parent may have taken a WRITE_UNCHANGED permission only and is
     * issuing such requests.  Drivers either must make sure that
     * these requests do not result in plain WRITE accesses (usually
     * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
     * every incoming write request as-is, including potentially that
     * flag), or they have to explicitly take the WRITE permission for
     * their children. */
797
    unsigned int supported_write_flags;
E
Eric Blake 已提交
798
    /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
799
     * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
800
    unsigned int supported_zero_flags;
801

802 803 804 805
    /* the following member gives a name to every node on the bs graph. */
    char node_name[32];
    /* element of the list of named nodes building the graph */
    QTAILQ_ENTRY(BlockDriverState) node_list;
806 807
    /* element of the list of all BlockDriverStates (all_bdrv_states) */
    QTAILQ_ENTRY(BlockDriverState) bs_list;
808 809
    /* element of the list of monitor-owned BDS */
    QTAILQ_ENTRY(BlockDriverState) monitor_list;
810
    int refcnt;
S
Stefan Hajnoczi 已提交
811

812 813 814
    /* operation blockers */
    QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];

815 816 817 818
    /* The node that this node inherited default options from (and a reopen on
     * which can affect this node by changing these defaults). This is always a
     * parent node of this node. */
    BlockDriverState *inherits_from;
819
    QLIST_HEAD(, BdrvChild) children;
K
Kevin Wolf 已提交
820
    QLIST_HEAD(, BdrvChild) parents;
821

822
    QDict *options;
K
Kevin Wolf 已提交
823
    QDict *explicit_options;
824
    BlockdevDetectZeroesOptions detect_zeroes;
825 826 827

    /* The error object in use for blocking operations on backing_hd */
    Error *backing_blocker;
828

829 830 831
    /* Protected by AioContext lock */

    /* If we are reading a disk image, give its size in sectors.
832 833
     * Generally read-only; it is written to by load_snapshot and
     * save_snaphost, but the block layer is quiescent during those.
834 835 836 837 838 839
     */
    int64_t total_sectors;

    /* Callback before write request is processed */
    NotifierWithReturnList before_write_notifiers;

840 841 842
    /* threshold limit for writes, in bytes. "High water mark". */
    uint64_t write_threshold_offset;
    NotifierWithReturn write_threshold_notifier;
843

844 845
    /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
     * Reading from the list can be done with either the BQL or the
846 847
     * dirty_bitmap_mutex.  Modifying a bitmap only requires
     * dirty_bitmap_mutex.  */
848
    QemuMutex dirty_bitmap_mutex;
849 850
    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;

851 852 853
    /* Offset after the highest byte written to */
    Stat64 wr_highest_offset;

854 855 856 857 858 859
    /* If true, copy read backing sectors into image.  Can be >1 if more
     * than one client has requested copy-on-read.  Accessed with atomic
     * ops.
     */
    int copy_on_read;

860 861 862 863 864 865
    /* number of in-flight requests; overall and serialising.
     * Accessed with atomic ops.
     */
    unsigned int in_flight;
    unsigned int serialising_in_flight;

866 867 868 869 870
    /* counter for nested bdrv_io_plug.
     * Accessed with atomic ops.
    */
    unsigned io_plugged;

871 872 873
    /* do we need to tell the quest if we have a volatile write cache? */
    int enable_write_cache;

874
    /* Accessed with atomic ops.  */
875
    int quiesce_counter;
876 877
    int recursive_quiesce_counter;

878
    unsigned int write_gen;               /* Current data generation */
879 880 881 882 883 884 885 886 887

    /* Protected by reqs_lock.  */
    CoMutex reqs_lock;
    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
    CoQueue flush_queue;                  /* Serializing flush queue */
    bool active_flush_req;                /* Flush request in flight? */

    /* Only read/written by whoever has set active_flush_req to true.  */
    unsigned int flushed_gen;             /* Flushed write generation */
B
bellard 已提交
888 889
};

M
Max Reitz 已提交
890 891 892 893 894 895
struct BlockBackendRootState {
    int open_flags;
    bool read_only;
    BlockdevDetectZeroesOptions detect_zeroes;
};

M
Max Reitz 已提交
896 897 898 899 900 901 902 903 904 905 906 907 908 909
typedef enum BlockMirrorBackingMode {
    /* Reuse the existing backing chain from the source for the target.
     * - sync=full: Set backing BDS to NULL.
     * - sync=top:  Use source's backing BDS.
     * - sync=none: Use source as the backing BDS. */
    MIRROR_SOURCE_BACKING_CHAIN,

    /* Open the target's backing chain completely anew */
    MIRROR_OPEN_BACKING_CHAIN,

    /* Do not change the target's backing BDS after job completion */
    MIRROR_LEAVE_BACKING_CHAIN,
} BlockMirrorBackingMode;

910 911 912 913 914
static inline BlockDriverState *backing_bs(BlockDriverState *bs)
{
    return bs->backing ? bs->backing->bs : NULL;
}

915 916 917 918 919 920 921

/* Essential block drivers which must always be statically linked into qemu, and
 * which therefore can be accessed without using bdrv_find_format() */
extern BlockDriver bdrv_file;
extern BlockDriver bdrv_raw;
extern BlockDriver bdrv_qcow2;

922
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
923 924
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
925
int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
926 927
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
928

929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
    int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
{
    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);

    return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
}

static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
    int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
{
    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);

    return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
}

945
extern unsigned int bdrv_drain_all_count;
946 947 948
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);

949
int get_tmp_filename(char *filename, int size);
950 951
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
                            const char *filename);
952

953 954 955
void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
                                      QDict *options);

956

957 958 959 960 961 962 963 964 965
/**
 * bdrv_add_before_write_notifier:
 *
 * Register a callback that is invoked before write requests are processed but
 * after any throttling or waiting for overlapping requests.
 */
void bdrv_add_before_write_notifier(BlockDriverState *bs,
                                    NotifierWithReturn *notifier);

M
Max Reitz 已提交
966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993
/**
 * bdrv_add_aio_context_notifier:
 *
 * If a long-running job intends to be always run in the same AioContext as a
 * certain BDS, it may use this function to be notified of changes regarding the
 * association of the BDS to an AioContext.
 *
 * attached_aio_context() is called after the target BDS has been attached to a
 * new AioContext; detach_aio_context() is called before the target BDS is being
 * detached from its old AioContext.
 */
void bdrv_add_aio_context_notifier(BlockDriverState *bs,
        void (*attached_aio_context)(AioContext *new_context, void *opaque),
        void (*detach_aio_context)(void *opaque), void *opaque);

/**
 * bdrv_remove_aio_context_notifier:
 *
 * Unsubscribe of change notifications regarding the BDS's AioContext. The
 * parameters given here have to be the same as those given to
 * bdrv_add_aio_context_notifier().
 */
void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
                                      void (*aio_context_attached)(AioContext *,
                                                                   void *),
                                      void (*aio_context_detached)(void *),
                                      void *opaque);

994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
/**
 * bdrv_wakeup:
 * @bs: The BlockDriverState for which an I/O operation has been completed.
 *
 * Wake up the main thread if it is waiting on BDRV_POLL_WHILE.  During
 * synchronous I/O on a BlockDriverState that is attached to another
 * I/O thread, the main thread lets the I/O thread's event loop run,
 * waiting for the I/O operation to complete.  A bdrv_wakeup will wake
 * up the main thread if necessary.
 *
 * Manual calls to bdrv_wakeup are rarely necessary, because
 * bdrv_dec_in_flight already calls it.
 */
void bdrv_wakeup(BlockDriverState *bs);

1009 1010 1011 1012
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif

P
Paolo Bonzini 已提交
1013 1014
/**
 * stream_start:
1015 1016
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
P
Paolo Bonzini 已提交
1017 1018 1019
 * @bs: Block device to operate on.
 * @base: Block device that will become the new base, or %NULL to
 * flatten the whole backing file chain onto @bs.
1020 1021
 * @backing_file_str: The file name that will be written to @bs as the
 * the new backing file if the job completes. Ignored if @base is %NULL.
1022 1023
 * @creation_flags: Flags that control the behavior of the Job lifetime.
 *                  See @BlockJobCreateFlags
1024
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
P
Paolo Bonzini 已提交
1025
 * @on_error: The action to take upon error.
1026
 * @errp: Error object.
P
Paolo Bonzini 已提交
1027 1028 1029 1030 1031
 *
 * Start a streaming operation on @bs.  Clusters that are unallocated
 * in @bs, but allocated in any image between @base and @bs (both
 * exclusive) will be written to @bs.  At the end of a successful
 * streaming job, the backing file of @bs will be changed to
1032 1033
 * @backing_file_str in the written image and to @base in the live
 * BlockDriverState.
P
Paolo Bonzini 已提交
1034
 */
1035 1036
void stream_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, const char *backing_file_str,
1037 1038
                  int creation_flags, int64_t speed,
                  BlockdevOnError on_error, Error **errp);
1039

1040 1041
/**
 * commit_start:
1042 1043
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
F
Fam Zheng 已提交
1044 1045 1046
 * @bs: Active block device.
 * @top: Top block device to be committed.
 * @base: Block device that will be written into, and become the new top.
1047 1048
 * @creation_flags: Flags that control the behavior of the Job lifetime.
 *                  See @BlockJobCreateFlags
1049 1050
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 * @on_error: The action to take upon error.
1051
 * @backing_file_str: String to use as the backing file in @top's overlay
1052 1053 1054
 * @filter_node_name: The node name that should be assigned to the filter
 * driver that the commit job inserts into the graph above @top. NULL means
 * that a node name should be autogenerated.
1055 1056 1057
 * @errp: Error object.
 *
 */
1058
void commit_start(const char *job_id, BlockDriverState *bs,
1059 1060
                  BlockDriverState *base, BlockDriverState *top,
                  int creation_flags, int64_t speed,
1061
                  BlockdevOnError on_error, const char *backing_file_str,
1062
                  const char *filter_node_name, Error **errp);
F
Fam Zheng 已提交
1063 1064
/**
 * commit_active_start:
1065 1066
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
F
Fam Zheng 已提交
1067 1068
 * @bs: Active block device to be committed.
 * @base: Block device that will be written into, and become the new top.
1069 1070
 * @creation_flags: Flags that control the behavior of the Job lifetime.
 *                  See @BlockJobCreateFlags
F
Fam Zheng 已提交
1071 1072
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 * @on_error: The action to take upon error.
1073 1074 1075
 * @filter_node_name: The node name that should be assigned to the filter
 * driver that the commit job inserts into the graph above @bs. NULL means that
 * a node name should be autogenerated.
F
Fam Zheng 已提交
1076 1077
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
1078
 * @auto_complete: Auto complete the job.
1079
 * @errp: Error object.
F
Fam Zheng 已提交
1080 1081
 *
 */
1082 1083 1084 1085 1086 1087
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
                              BlockDriverState *base, int creation_flags,
                              int64_t speed, BlockdevOnError on_error,
                              const char *filter_node_name,
                              BlockCompletionFunc *cb, void *opaque,
                              bool auto_complete, Error **errp);
P
Paolo Bonzini 已提交
1088 1089
/*
 * mirror_start:
1090 1091
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
P
Paolo Bonzini 已提交
1092 1093
 * @bs: Block device to operate on.
 * @target: Block device to write to.
1094 1095
 * @replaces: Block graph node name to replace once the mirror is done. Can
 *            only be used when full mirroring is selected.
1096 1097
 * @creation_flags: Flags that control the behavior of the Job lifetime.
 *                  See @BlockJobCreateFlags
P
Paolo Bonzini 已提交
1098
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
1099
 * @granularity: The chosen granularity for the dirty bitmap.
1100
 * @buf_size: The amount of data that can be in flight at one time.
P
Paolo Bonzini 已提交
1101
 * @mode: Whether to collapse all images in the chain to the target.
M
Max Reitz 已提交
1102
 * @backing_mode: How to establish the target's backing chain after completion.
1103 1104
 * @on_source_error: The action to take upon error reading from the source.
 * @on_target_error: The action to take upon error writing to the target.
1105
 * @unmap: Whether to unmap target where source sectors only contain zeroes.
1106 1107 1108
 * @filter_node_name: The node name that should be assigned to the filter
 * driver that the mirror job inserts into the graph above @bs. NULL means that
 * a node name should be autogenerated.
1109
 * @copy_mode: When to trigger writes to the target.
P
Paolo Bonzini 已提交
1110 1111 1112
 * @errp: Error object.
 *
 * Start a mirroring operation on @bs.  Clusters that are allocated
1113
 * in @bs will be written to @target until the job is cancelled or
P
Paolo Bonzini 已提交
1114 1115 1116
 * manually completed.  At the end of a successful mirroring job,
 * @bs will be switched to read from @target.
 */
1117 1118
void mirror_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *target, const char *replaces,
1119 1120
                  int creation_flags, int64_t speed,
                  uint32_t granularity, int64_t buf_size,
M
Max Reitz 已提交
1121 1122
                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                  BlockdevOnError on_source_error,
1123
                  BlockdevOnError on_target_error,
1124 1125
                  bool unmap, const char *filter_node_name,
                  MirrorCopyMode copy_mode, Error **errp);
P
Paolo Bonzini 已提交
1126

1127
/*
1128
 * backup_job_create:
1129 1130
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
1131 1132 1133
 * @bs: Block device to operate on.
 * @target: Block device to write to.
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
1134
 * @sync_mode: What parts of the disk image should be copied to the destination.
1135
 * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL.
1136 1137
 * @on_source_error: The action to take upon error reading from the source.
 * @on_target_error: The action to take upon error writing to the target.
1138 1139
 * @creation_flags: Flags that control the behavior of the Job lifetime.
 *                  See @BlockJobCreateFlags
1140 1141
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
1142
 * @txn: Transaction that this job is part of (may be NULL).
1143
 *
1144
 * Create a backup operation on @bs.  Clusters in @bs are written to @target
1145 1146
 * until the job is cancelled or manually completed.
 */
1147 1148 1149 1150 1151 1152 1153 1154 1155
BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                            BlockDriverState *target, int64_t speed,
                            MirrorSyncMode sync_mode,
                            BdrvDirtyBitmap *sync_bitmap,
                            bool compress,
                            BlockdevOnError on_source_error,
                            BlockdevOnError on_target_error,
                            int creation_flags,
                            BlockCompletionFunc *cb, void *opaque,
K
Kevin Wolf 已提交
1156
                            JobTxn *txn, Error **errp);
1157

1158 1159
void hmp_drive_add_node(Monitor *mon, const char *optstr);

1160 1161
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                  const char *child_name,
1162
                                  const BdrvChildRole *child_role,
1163
                                  AioContext *ctx,
1164 1165
                                  uint64_t perm, uint64_t shared_perm,
                                  void *opaque, Error **errp);
1166 1167
void bdrv_root_unref_child(BdrvChild *child);

1168 1169 1170
int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
                            Error **errp);

1171 1172 1173 1174 1175
/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
 * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to
 * all children */
void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
                               const BdrvChildRole *role,
1176
                               BlockReopenQueue *reopen_queue,
1177 1178 1179
                               uint64_t perm, uint64_t shared,
                               uint64_t *nperm, uint64_t *nshared);

1180 1181 1182 1183 1184 1185
/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
 * (non-raw) image formats: Like above for bs->backing, but for bs->file it
 * requires WRITE | RESIZE for read-write images, always requires
 * CONSISTENT_READ and doesn't share WRITE. */
void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
                               const BdrvChildRole *role,
1186
                               BlockReopenQueue *reopen_queue,
1187 1188
                               uint64_t perm, uint64_t shared,
                               uint64_t *nperm, uint64_t *nshared);
1189

1190
/*
1191
 * Default implementation for drivers to pass bdrv_co_block_status() to
1192 1193
 * their file.
 */
1194 1195 1196 1197 1198 1199 1200
int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
                                                bool want_zero,
                                                int64_t offset,
                                                int64_t bytes,
                                                int64_t *pnum,
                                                int64_t *map,
                                                BlockDriverState **file);
1201
/*
1202
 * Default implementation for drivers to pass bdrv_co_block_status() to
1203 1204
 * their backing file.
 */
1205 1206 1207 1208 1209 1210 1211
int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
                                                   bool want_zero,
                                                   int64_t offset,
                                                   int64_t bytes,
                                                   int64_t *pnum,
                                                   int64_t *map,
                                                   BlockDriverState **file);
K
Kevin Wolf 已提交
1212
const char *bdrv_get_parent_name(const BlockDriverState *bs);
1213
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
1214
bool blk_dev_has_removable_media(BlockBackend *blk);
M
Max Reitz 已提交
1215
bool blk_dev_has_tray(BlockBackend *blk);
1216 1217 1218 1219
void blk_dev_eject_request(BlockBackend *blk, bool force);
bool blk_dev_is_tray_open(BlockBackend *blk);
bool blk_dev_is_medium_locked(BlockBackend *blk);

1220
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
1221

F
Fam Zheng 已提交
1222
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
1223
void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
F
Fam Zheng 已提交
1224

1225 1226 1227
void bdrv_inc_in_flight(BlockDriverState *bs);
void bdrv_dec_in_flight(BlockDriverState *bs);

1228 1229
void blockdev_close_all_bdrv_states(void);

1230 1231
int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
                                         BdrvChild *dst, uint64_t dst_offset,
1232 1233 1234
                                         uint64_t bytes,
                                         BdrvRequestFlags read_flags,
                                         BdrvRequestFlags write_flags);
1235 1236
int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
                                       BdrvChild *dst, uint64_t dst_offset,
1237 1238 1239
                                       uint64_t bytes,
                                       BdrvRequestFlags read_flags,
                                       BdrvRequestFlags write_flags);
1240

1241 1242
int refresh_total_sectors(BlockDriverState *bs, int64_t hint);

B
bellard 已提交
1243
#endif /* BLOCK_INT_H */