“a2cb15b0ddfa05f81a42d7b65dd0c7c50e420c33”上不存在“include/hw/pci-host/q35.h”
block_int.h 31.5 KB
Newer Older
B
bellard 已提交
1 2
/*
 * QEMU System Emulator block driver
3
 *
B
bellard 已提交
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
#ifndef BLOCK_INT_H
#define BLOCK_INT_H

27
#include "block/accounting.h"
28
#include "block/block.h"
29 30
#include "qemu/option.h"
#include "qemu/queue.h"
31
#include "qemu/coroutine.h"
32
#include "qemu/timer.h"
L
Luiz Capitulino 已提交
33
#include "qapi-types.h"
34
#include "qemu/hbitmap.h"
35
#include "block/snapshot.h"
36
#include "qemu/main-loop.h"
37
#include "qemu/throttle.h"
P
pbrook 已提交
38

39 40
#define BLOCK_FLAG_ENCRYPT          1
#define BLOCK_FLAG_LAZY_REFCOUNTS   8
41

42 43 44
#define BLOCK_OPT_SIZE              "size"
#define BLOCK_OPT_ENCRYPT           "encryption"
#define BLOCK_OPT_COMPAT6           "compat6"
45
#define BLOCK_OPT_HWVERSION         "hwversion"
46 47 48 49 50 51 52 53
#define BLOCK_OPT_BACKING_FILE      "backing_file"
#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
#define BLOCK_OPT_TABLE_SIZE        "table_size"
#define BLOCK_OPT_PREALLOC          "preallocation"
#define BLOCK_OPT_SUBFMT            "subformat"
#define BLOCK_OPT_COMPAT_LEVEL      "compat"
#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
54
#define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
55
#define BLOCK_OPT_REDUNDANCY        "redundancy"
56
#define BLOCK_OPT_NOCOW             "nocow"
57
#define BLOCK_OPT_OBJECT_SIZE       "object_size"
58
#define BLOCK_OPT_REFCOUNT_BITS     "refcount_bits"
59

60 61
#define BLOCK_PROBE_BUF_SIZE        512

62 63 64 65 66 67 68 69
enum BdrvTrackedRequestType {
    BDRV_TRACKED_READ,
    BDRV_TRACKED_WRITE,
    BDRV_TRACKED_FLUSH,
    BDRV_TRACKED_IOCTL,
    BDRV_TRACKED_DISCARD,
};

70 71
typedef struct BdrvTrackedRequest {
    BlockDriverState *bs;
72 73
    int64_t offset;
    unsigned int bytes;
74
    enum BdrvTrackedRequestType type;
75

76
    bool serialising;
77 78 79
    int64_t overlap_offset;
    unsigned int overlap_bytes;

80 81 82
    QLIST_ENTRY(BdrvTrackedRequest) list;
    Coroutine *co; /* owner, used for deadlock detection */
    CoQueue wait_queue; /* coroutines blocked on this request */
83 84

    struct BdrvTrackedRequest *waiting_for;
85 86
} BdrvTrackedRequest;

B
bellard 已提交
87 88 89
struct BlockDriver {
    const char *format_name;
    int instance_size;
90

91 92 93 94
    /* set to true if the BlockDriver is a block filter */
    bool is_filter;
    /* for snapshots block filter like Quorum can implement the
     * following recursive callback.
95 96 97
     * It's purpose is to recurse on the filter children while calling
     * bdrv_recurse_is_first_non_filter on them.
     * For a sample implementation look in the future Quorum block filter.
98
     */
99 100
    bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
                                             BlockDriverState *candidate);
101

B
bellard 已提交
102
    int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
103
    int (*bdrv_probe_device)(const char *filename);
104 105 106

    /* Any driver implementing this callback is expected to be able to handle
     * NULL file names in its .bdrv_open() implementation */
107
    void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
108 109 110 111 112 113
    /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
     * this field set to true, except ones that are defined only by their
     * child's bs.
     * An example of the last type will be the quorum block driver.
     */
    bool bdrv_needs_filename;
114

115 116 117
    /* Set if a driver can support backing files */
    bool supports_backing;

118 119 120 121 122
    /* For handling image reopen for split or non-split files */
    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
                               BlockReopenQueue *queue, Error **errp);
    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
123
    void (*bdrv_join_options)(QDict *options, QDict *old_options);
124

M
Max Reitz 已提交
125 126 127 128
    int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp);
    int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp);
B
bellard 已提交
129
    void (*bdrv_close)(BlockDriverState *bs);
C
Chunyan Liu 已提交
130
    int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
B
bellard 已提交
131
    int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
132
    int (*bdrv_make_empty)(BlockDriverState *bs);
M
Max Reitz 已提交
133

134
    void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
M
Max Reitz 已提交
135

B
bellard 已提交
136
    /* aio */
137
    BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
138
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
139
        BlockCompletionFunc *cb, void *opaque);
140
    BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
141
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
142
        BlockCompletionFunc *cb, void *opaque);
143
    BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
144
        BlockCompletionFunc *cb, void *opaque);
145 146
    BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
        int64_t offset, int count,
147
        BlockCompletionFunc *cb, void *opaque);
B
bellard 已提交
148

K
Kevin Wolf 已提交
149 150
    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
151 152
    int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
K
Kevin Wolf 已提交
153 154
    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
155 156
    int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
157 158
    int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
159

160 161 162
    /*
     * Efficiently zero a region of the disk image.  Typically an image format
     * would use a compact metadata representation to implement this.  This
163 164
     * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
     * will be called instead.
165
     */
E
Eric Blake 已提交
166 167
    int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
        int64_t offset, int count, BdrvRequestFlags flags);
168 169
    int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors);
170 171
    int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
        int64_t offset, int count);
172
    int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
173 174
        int64_t sector_num, int nb_sectors, int *pnum,
        BlockDriverState **file);
K
Kevin Wolf 已提交
175

176 177 178
    /*
     * Invalidate any cached meta-data.
     */
179
    void (*bdrv_invalidate_cache)(BlockDriverState *bs, Error **errp);
180
    int (*bdrv_inactivate)(BlockDriverState *bs);
181

P
Pavel Dovgalyuk 已提交
182 183 184 185 186 187 188
    /*
     * Flushes all data for all layers by calling bdrv_co_flush for underlying
     * layers, if needed. This function is needed for deterministic
     * synchronization of the flush finishing callback.
     */
    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);

189 190 191 192 193 194
    /*
     * Flushes all data that was already written to the OS all the way down to
     * the disk (for example raw-posix calls fsync()).
     */
    int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);

K
Kevin Wolf 已提交
195 196 197 198 199 200 201
    /*
     * Flushes all internal caches to the OS. The data may still sit in a
     * writeback cache of the host OS, but it will survive a crash of the qemu
     * process.
     */
    int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);

B
bellard 已提交
202 203
    const char *protocol_name;
    int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
204

B
bellard 已提交
205
    int64_t (*bdrv_getlength)(BlockDriverState *bs);
206
    bool has_variable_length;
207
    int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
208

209
    int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
B
bellard 已提交
210 211
                                 const uint8_t *buf, int nb_sectors);

212
    int (*bdrv_snapshot_create)(BlockDriverState *bs,
B
bellard 已提交
213
                                QEMUSnapshotInfo *sn_info);
214
    int (*bdrv_snapshot_goto)(BlockDriverState *bs,
B
bellard 已提交
215
                              const char *snapshot_id);
216 217 218 219
    int (*bdrv_snapshot_delete)(BlockDriverState *bs,
                                const char *snapshot_id,
                                const char *name,
                                Error **errp);
220
    int (*bdrv_snapshot_list)(BlockDriverState *bs,
B
bellard 已提交
221
                              QEMUSnapshotInfo **psn_info);
E
edison 已提交
222
    int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
223 224 225
                                  const char *snapshot_id,
                                  const char *name,
                                  Error **errp);
B
bellard 已提交
226
    int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
M
Max Reitz 已提交
227
    ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
B
bellard 已提交
228

229 230 231 232 233 234
    int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
                                          QEMUIOVector *qiov,
                                          int64_t pos);
    int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
                                          QEMUIOVector *qiov,
                                          int64_t pos);
235

K
Kevin Wolf 已提交
236 237 238
    int (*bdrv_change_backing_file)(BlockDriverState *bs,
        const char *backing_file, const char *backing_fmt);

B
bellard 已提交
239
    /* removable device specific */
240
    bool (*bdrv_is_inserted)(BlockDriverState *bs);
B
bellard 已提交
241
    int (*bdrv_media_changed)(BlockDriverState *bs);
242
    void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
243
    void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
244

245
    /* to control generic scsi devices */
246
    BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
247
        unsigned long int req, void *buf,
248
        BlockCompletionFunc *cb, void *opaque);
249

250
    /* List of options for creating images, terminated by name == NULL */
251
    QemuOptsList *create_opts;
252

253 254 255 256
    /*
     * Returns 0 for completed check, -errno for internal errors.
     * The check results are stored in result.
     */
257 258
    int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
        BdrvCheckMode fix);
A
aliguori 已提交
259

260
    int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
261 262
                              BlockDriverAmendStatusCB *status_cb,
                              void *cb_opaque);
M
Max Reitz 已提交
263

264
    void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
K
Kevin Wolf 已提交
265

K
Kevin Wolf 已提交
266 267 268
    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
        const char *tag);
F
Fam Zheng 已提交
269 270
    int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
        const char *tag);
K
Kevin Wolf 已提交
271 272 273
    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);

274
    void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
275

K
Kevin Wolf 已提交
276 277 278 279 280
    /*
     * Returns 1 if newly created images are guaranteed to contain only
     * zeros, 0 otherwise.
     */
    int (*bdrv_has_zero_init)(BlockDriverState *bs);
281

282 283 284 285 286 287 288 289 290 291 292 293 294
    /* Remove fd handlers, timers, and other event loop callbacks so the event
     * loop is no longer in use.  Called with no in-flight requests and in
     * depth-first traversal order with parents before child nodes.
     */
    void (*bdrv_detach_aio_context)(BlockDriverState *bs);

    /* Add fd handlers, timers, and other event loop callbacks so I/O requests
     * can be processed again.  Called with no in-flight requests and in
     * depth-first traversal order with child nodes before parent nodes.
     */
    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
                                    AioContext *new_context);

295 296 297 298
    /* io queue for linux-aio */
    void (*bdrv_io_plug)(BlockDriverState *bs);
    void (*bdrv_io_unplug)(BlockDriverState *bs);

299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
    /**
     * Try to get @bs's logical and physical block size.
     * On success, store them in @bsz and return zero.
     * On failure, return negative errno.
     */
    int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
    /**
     * Try to get @bs's geometry (cyls, heads, sectors)
     * On success, store them in @geo and return 0.
     * On failure return -errno.
     * Only drivers that want to override guest geometry implement this
     * callback; see hd_geometry_guess().
     */
    int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);

314 315 316 317 318 319
    /**
     * Drain and stop any internal sources of requests in the driver, and
     * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
     */
    void (*bdrv_drain)(BlockDriverState *bs);

320 321 322 323 324
    void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
                           Error **errp);
    void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
                           Error **errp);

325
    QLIST_ENTRY(BlockDriver) list;
B
bellard 已提交
326 327
};

328
typedef struct BlockLimits {
329 330 331 332 333 334
    /* Alignment requirement, in bytes, for offset/length of I/O
     * requests. Must be a power of 2 less than INT_MAX; defaults to
     * 1 for drivers with modern byte interfaces, and to 512
     * otherwise. */
    uint32_t request_alignment;

335 336 337 338 339 340 341 342
    /* maximum number of bytes that can be discarded at once (since it
     * is signed, it must be < 2G, if set), should be multiple of
     * pdiscard_alignment, but need not be power of 2. May be 0 if no
     * inherent 32-bit limit */
    int32_t max_pdiscard;

    /* optimal alignment for discard requests in bytes, must be power
     * of 2, less than max_pdiscard if that is set, and multiple of
343 344
     * bl.request_alignment. May be 0 if bl.request_alignment is good
     * enough */
345
    uint32_t pdiscard_alignment;
346

347
    /* maximum number of bytes that can zeroized at once (since it is
348 349
     * signed, it must be < 2G, if set), should be multiple of
     * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
350
    int32_t max_pwrite_zeroes;
351

352
    /* optimal alignment for write zeroes requests in bytes, must be
353
     * power of 2, less than max_pwrite_zeroes if that is set, and
354 355
     * multiple of bl.request_alignment. May be 0 if
     * bl.request_alignment is good enough */
356
    uint32_t pwrite_zeroes_alignment;
357

358
    /* optimal transfer length in bytes (must be power of 2, and
359
     * multiple of bl.request_alignment), or 0 if no preferred size */
360 361 362 363 364 365
    uint32_t opt_transfer;

    /* maximal transfer length in bytes (need not be power of 2, but
     * should be multiple of opt_transfer), or 0 for no 32-bit limit.
     * For now, anything larger than INT_MAX is clamped down. */
    uint32_t max_transfer;
366

367
    /* memory alignment, in bytes so that no bounce buffer is needed */
368 369
    size_t min_mem_alignment;

370
    /* memory alignment, in bytes, for bounce buffer */
371
    size_t opt_mem_alignment;
372 373 374

    /* maximum number of iovec elements */
    int max_iov;
375 376
} BlockLimits;

377 378
typedef struct BdrvOpBlocker BdrvOpBlocker;

M
Max Reitz 已提交
379 380 381 382 383
typedef struct BdrvAioNotifier {
    void (*attached_aio_context)(AioContext *new_context, void *opaque);
    void (*detach_aio_context)(void *opaque);

    void *opaque;
384
    bool deleted;
M
Max Reitz 已提交
385 386 387 388

    QLIST_ENTRY(BdrvAioNotifier) list;
} BdrvAioNotifier;

389
struct BdrvChildRole {
390 391
    void (*inherit_options)(int *child_flags, QDict *child_options,
                            int parent_flags, QDict *parent_options);
392

393 394 395
    void (*change_media)(BdrvChild *child, bool load);
    void (*resize)(BdrvChild *child);

396 397 398 399 400
    /* Returns a name that is supposedly more useful for human users than the
     * node name for identifying the node in question (in particular, a BB
     * name), or NULL if the parent can't provide a better name. */
    const char* (*get_name)(BdrvChild *child);

401 402 403 404 405 406 407 408 409 410
    /*
     * If this pair of functions is implemented, the parent doesn't issue new
     * requests after returning from .drained_begin() until .drained_end() is
     * called.
     *
     * Note that this can be nested. If drained_begin() was called twice, new
     * I/O is allowed only after drained_end() was called twice, too.
     */
    void (*drained_begin)(BdrvChild *child);
    void (*drained_end)(BdrvChild *child);
411 412 413 414 415
};

extern const BdrvChildRole child_file;
extern const BdrvChildRole child_format;

K
Kevin Wolf 已提交
416
struct BdrvChild {
417
    BlockDriverState *bs;
418
    char *name;
419
    const BdrvChildRole *role;
K
Kevin Wolf 已提交
420
    void *opaque;
421
    QLIST_ENTRY(BdrvChild) next;
K
Kevin Wolf 已提交
422
    QLIST_ENTRY(BdrvChild) next_parent;
K
Kevin Wolf 已提交
423
};
424

425 426 427 428 429 430
/*
 * Note: the function bdrv_append() copies and swaps contents of
 * BlockDriverStates, so if you add new fields to this struct, please
 * inspect bdrv_append() to determine if the new fields need to be
 * copied as well.
 */
B
bellard 已提交
431
struct BlockDriverState {
432 433
    int64_t total_sectors; /* if we are reading a disk image, give its
                              size in sectors */
434
    int open_flags; /* flags used to open the file, re-used for re-open */
435 436 437 438 439 440 441
    bool read_only; /* if true, the media is read only */
    bool encrypted; /* if true, the media is encrypted */
    bool valid_key; /* if true, a valid encryption key has been set */
    bool sg;        /* if true, the device is a /dev/sg* */
    bool probed;    /* if true, format was probed rather than specified */

    int copy_on_read; /* if nonzero, copy read backing sectors into image.
442
                         note this is a reference count */
B
bellard 已提交
443

444 445 446 447 448
    CoQueue flush_queue;            /* Serializing flush queue */
    unsigned int write_gen;         /* Current data generation */
    unsigned int flush_started_gen; /* Generation for which flush has started */
    unsigned int flushed_gen;       /* Flushed write generation */

B
bellard 已提交
449
    BlockDriver *drv; /* NULL means no media */
B
bellard 已提交
450 451
    void *opaque;

452
    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
M
Max Reitz 已提交
453 454 455 456
    /* long-running tasks intended to always use the same AioContext as this
     * BDS may register themselves in this list to be notified of changes
     * regarding this BDS's context */
    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
457
    bool walking_aio_notifiers; /* to make removal during iteration safe */
458

459 460 461
    char filename[PATH_MAX];
    char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
                                    this file image */
462
    char backing_format[16]; /* if non-zero and backing_file exists */
B
bellard 已提交
463

M
Max Reitz 已提交
464
    QDict *full_open_options;
465
    char exact_filename[PATH_MAX];
M
Max Reitz 已提交
466

467
    BdrvChild *backing;
K
Kevin Wolf 已提交
468
    BdrvChild *file;
469

470 471 472
    /* Callback before write request is processed */
    NotifierWithReturnList before_write_notifiers;

473 474
    /* number of in-flight serialising requests */
    unsigned int serialising_in_flight;
475

476 477 478
    /* Offset after the highest byte written to */
    uint64_t wr_highest_offset;

479 480 481
    /* I/O Limits */
    BlockLimits bl;

482 483
    /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
    unsigned int supported_write_flags;
E
Eric Blake 已提交
484
    /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
485 486
     * BDRV_REQ_MAY_UNMAP) */
    unsigned int supported_zero_flags;
487

488 489 490 491
    /* the following member gives a name to every node on the bs graph. */
    char node_name[32];
    /* element of the list of named nodes building the graph */
    QTAILQ_ENTRY(BlockDriverState) node_list;
492 493
    /* element of the list of all BlockDriverStates (all_bdrv_states) */
    QTAILQ_ENTRY(BlockDriverState) bs_list;
494 495
    /* element of the list of monitor-owned BDS */
    QTAILQ_ENTRY(BlockDriverState) monitor_list;
F
Fam Zheng 已提交
496
    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
497
    int refcnt;
S
Stefan Hajnoczi 已提交
498 499

    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
500

501 502 503
    /* operation blockers */
    QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];

504 505
    /* long-running background operation */
    BlockJob *job;
506

507 508 509 510
    /* The node that this node inherited default options from (and a reopen on
     * which can affect this node by changing these defaults). This is always a
     * parent node of this node. */
    BlockDriverState *inherits_from;
511
    QLIST_HEAD(, BdrvChild) children;
K
Kevin Wolf 已提交
512
    QLIST_HEAD(, BdrvChild) parents;
513

514
    QDict *options;
K
Kevin Wolf 已提交
515
    QDict *explicit_options;
516
    BlockdevDetectZeroesOptions detect_zeroes;
517 518 519

    /* The error object in use for blocking operations on backing_hd */
    Error *backing_blocker;
520 521 522 523

    /* threshold limit for writes, in bytes. "High water mark". */
    uint64_t write_threshold_offset;
    NotifierWithReturn write_threshold_notifier;
524

525 526 527 528
    /* counters for nested bdrv_io_plug and bdrv_io_unplugged_begin */
    unsigned io_plugged;
    unsigned io_plug_disabled;

529
    int quiesce_counter;
B
bellard 已提交
530 531
};

M
Max Reitz 已提交
532 533 534 535 536 537
struct BlockBackendRootState {
    int open_flags;
    bool read_only;
    BlockdevDetectZeroesOptions detect_zeroes;
};

M
Max Reitz 已提交
538 539 540 541 542 543 544 545 546 547 548 549 550 551
typedef enum BlockMirrorBackingMode {
    /* Reuse the existing backing chain from the source for the target.
     * - sync=full: Set backing BDS to NULL.
     * - sync=top:  Use source's backing BDS.
     * - sync=none: Use source as the backing BDS. */
    MIRROR_SOURCE_BACKING_CHAIN,

    /* Open the target's backing chain completely anew */
    MIRROR_OPEN_BACKING_CHAIN,

    /* Do not change the target's backing BDS after job completion */
    MIRROR_LEAVE_BACKING_CHAIN,
} BlockMirrorBackingMode;

552 553 554 555 556
static inline BlockDriverState *backing_bs(BlockDriverState *bs)
{
    return bs->backing ? bs->backing->bs : NULL;
}

557 558 559 560 561 562 563

/* Essential block drivers which must always be statically linked into qemu, and
 * which therefore can be accessed without using bdrv_find_format() */
extern BlockDriver bdrv_file;
extern BlockDriver bdrv_raw;
extern BlockDriver bdrv_qcow2;

564 565 566 567 568 569 570 571
/**
 * bdrv_setup_io_funcs:
 *
 * Prepare a #BlockDriver for I/O request processing by populating
 * unimplemented coroutine and AIO interfaces with generic wrapper functions
 * that fall back to implemented interfaces.
 */
void bdrv_setup_io_funcs(BlockDriver *bdrv);
572

573
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
574 575
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
576
int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
577 578
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
579

580
int get_tmp_filename(char *filename, int size);
581 582
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
                            const char *filename);
583

584

585 586 587 588 589 590 591 592 593
/**
 * bdrv_add_before_write_notifier:
 *
 * Register a callback that is invoked before write requests are processed but
 * after any throttling or waiting for overlapping requests.
 */
void bdrv_add_before_write_notifier(BlockDriverState *bs,
                                    NotifierWithReturn *notifier);

594 595 596 597 598
/**
 * bdrv_detach_aio_context:
 *
 * May be called from .bdrv_detach_aio_context() to detach children from the
 * current #AioContext.  This is only needed by block drivers that manage their
599
 * own children.  Both ->file and ->backing are automatically handled and
600 601 602 603 604 605 606 607 608
 * block drivers should not call this function on them explicitly.
 */
void bdrv_detach_aio_context(BlockDriverState *bs);

/**
 * bdrv_attach_aio_context:
 *
 * May be called from .bdrv_attach_aio_context() to attach children to the new
 * #AioContext.  This is only needed by block drivers that manage their own
609
 * children.  Both ->file and ->backing are automatically handled and block
610 611 612 613 614
 * drivers should not call this function on them explicitly.
 */
void bdrv_attach_aio_context(BlockDriverState *bs,
                             AioContext *new_context);

M
Max Reitz 已提交
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
/**
 * bdrv_add_aio_context_notifier:
 *
 * If a long-running job intends to be always run in the same AioContext as a
 * certain BDS, it may use this function to be notified of changes regarding the
 * association of the BDS to an AioContext.
 *
 * attached_aio_context() is called after the target BDS has been attached to a
 * new AioContext; detach_aio_context() is called before the target BDS is being
 * detached from its old AioContext.
 */
void bdrv_add_aio_context_notifier(BlockDriverState *bs,
        void (*attached_aio_context)(AioContext *new_context, void *opaque),
        void (*detach_aio_context)(void *opaque), void *opaque);

/**
 * bdrv_remove_aio_context_notifier:
 *
 * Unsubscribe of change notifications regarding the BDS's AioContext. The
 * parameters given here have to be the same as those given to
 * bdrv_add_aio_context_notifier().
 */
void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
                                      void (*aio_context_attached)(AioContext *,
                                                                   void *),
                                      void (*aio_context_detached)(void *),
                                      void *opaque);

643 644 645 646
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif

P
Paolo Bonzini 已提交
647 648
/**
 * stream_start:
649 650
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
P
Paolo Bonzini 已提交
651 652 653
 * @bs: Block device to operate on.
 * @base: Block device that will become the new base, or %NULL to
 * flatten the whole backing file chain onto @bs.
654 655
 * @backing_file_str: The file name that will be written to @bs as the
 * the new backing file if the job completes. Ignored if @base is %NULL.
656
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
P
Paolo Bonzini 已提交
657
 * @on_error: The action to take upon error.
P
Paolo Bonzini 已提交
658 659
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
660
 * @errp: Error object.
P
Paolo Bonzini 已提交
661 662 663 664 665
 *
 * Start a streaming operation on @bs.  Clusters that are unallocated
 * in @bs, but allocated in any image between @base and @bs (both
 * exclusive) will be written to @bs.  At the end of a successful
 * streaming job, the backing file of @bs will be changed to
666 667
 * @backing_file_str in the written image and to @base in the live
 * BlockDriverState.
P
Paolo Bonzini 已提交
668
 */
669 670 671 672
void stream_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, const char *backing_file_str,
                  int64_t speed, BlockdevOnError on_error,
                  BlockCompletionFunc *cb, void *opaque, Error **errp);
673

674 675
/**
 * commit_start:
676 677
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
F
Fam Zheng 已提交
678 679 680
 * @bs: Active block device.
 * @top: Top block device to be committed.
 * @base: Block device that will be written into, and become the new top.
681 682 683 684
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 * @on_error: The action to take upon error.
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
685
 * @backing_file_str: String to use as the backing file in @top's overlay
686 687 688
 * @errp: Error object.
 *
 */
689 690 691 692
void commit_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
                  BlockdevOnError on_error, BlockCompletionFunc *cb,
                  void *opaque, const char *backing_file_str, Error **errp);
F
Fam Zheng 已提交
693 694
/**
 * commit_active_start:
695 696
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
F
Fam Zheng 已提交
697 698 699 700 701 702 703 704 705
 * @bs: Active block device to be committed.
 * @base: Block device that will be written into, and become the new top.
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 * @on_error: The action to take upon error.
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
 * @errp: Error object.
 *
 */
706 707
void commit_active_start(const char *job_id, BlockDriverState *bs,
                         BlockDriverState *base, int64_t speed,
F
Fam Zheng 已提交
708
                         BlockdevOnError on_error,
709
                         BlockCompletionFunc *cb,
F
Fam Zheng 已提交
710
                         void *opaque, Error **errp);
P
Paolo Bonzini 已提交
711 712
/*
 * mirror_start:
713 714
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
P
Paolo Bonzini 已提交
715 716
 * @bs: Block device to operate on.
 * @target: Block device to write to.
717 718
 * @replaces: Block graph node name to replace once the mirror is done. Can
 *            only be used when full mirroring is selected.
P
Paolo Bonzini 已提交
719
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
720
 * @granularity: The chosen granularity for the dirty bitmap.
721
 * @buf_size: The amount of data that can be in flight at one time.
P
Paolo Bonzini 已提交
722
 * @mode: Whether to collapse all images in the chain to the target.
M
Max Reitz 已提交
723
 * @backing_mode: How to establish the target's backing chain after completion.
724 725
 * @on_source_error: The action to take upon error reading from the source.
 * @on_target_error: The action to take upon error writing to the target.
726
 * @unmap: Whether to unmap target where source sectors only contain zeroes.
P
Paolo Bonzini 已提交
727 728 729 730 731 732 733 734 735
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
 * @errp: Error object.
 *
 * Start a mirroring operation on @bs.  Clusters that are allocated
 * in @bs will be written to @bs until the job is cancelled or
 * manually completed.  At the end of a successful mirroring job,
 * @bs will be switched to read from @target.
 */
736 737
void mirror_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *target, const char *replaces,
738
                  int64_t speed, uint32_t granularity, int64_t buf_size,
M
Max Reitz 已提交
739 740
                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                  BlockdevOnError on_source_error,
741
                  BlockdevOnError on_target_error,
742
                  bool unmap,
743
                  BlockCompletionFunc *cb,
P
Paolo Bonzini 已提交
744 745
                  void *opaque, Error **errp);

746 747
/*
 * backup_start:
748 749
 * @job_id: The id of the newly-created job, or %NULL to use the
 * device name of @bs.
750 751 752
 * @bs: Block device to operate on.
 * @target: Block device to write to.
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
753
 * @sync_mode: What parts of the disk image should be copied to the destination.
754
 * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL.
755 756 757 758
 * @on_source_error: The action to take upon error reading from the source.
 * @on_target_error: The action to take upon error writing to the target.
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
759
 * @txn: Transaction that this job is part of (may be NULL).
760 761 762 763
 *
 * Start a backup operation on @bs.  Clusters in @bs are written to @target
 * until the job is cancelled or manually completed.
 */
764 765 766
void backup_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *target, int64_t speed,
                  MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
767
                  BlockdevOnError on_source_error,
768
                  BlockdevOnError on_target_error,
769
                  BlockCompletionFunc *cb, void *opaque,
770
                  BlockJobTxn *txn, Error **errp);
771

772 773
void hmp_drive_add_node(Monitor *mon, const char *optstr);

774 775
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                  const char *child_name,
776 777
                                  const BdrvChildRole *child_role,
                                  void *opaque);
778 779
void bdrv_root_unref_child(BdrvChild *child);

K
Kevin Wolf 已提交
780
const char *bdrv_get_parent_name(const BlockDriverState *bs);
781 782
void blk_dev_change_media_cb(BlockBackend *blk, bool load);
bool blk_dev_has_removable_media(BlockBackend *blk);
M
Max Reitz 已提交
783
bool blk_dev_has_tray(BlockBackend *blk);
784 785 786 787
void blk_dev_eject_request(BlockBackend *blk, bool force);
bool blk_dev_is_tray_open(BlockBackend *blk);
bool blk_dev_is_medium_locked(BlockBackend *blk);

788
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
789
bool bdrv_requests_pending(BlockDriverState *bs);
790

F
Fam Zheng 已提交
791 792 793
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);

794 795
void blockdev_close_all_bdrv_states(void);

B
bellard 已提交
796
#endif /* BLOCK_INT_H */